diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go index 132a715ff4..06ace550b8 100644 --- a/api/v1alpha1/api.go +++ b/api/v1alpha1/api.go @@ -277,6 +277,16 @@ type AIGatewayRouteRuleBackendRef struct { // +kubebuilder:validation:Minimum=0 // +kubebuilder:default=1 Weight *int32 `json:"weight,omitempty"` + // Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints. + // See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority + // Note: This will override the `faillback` property of the underlying Envoy Gateway Backend + // + // Default is 0. + // + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=0 + Priority *uint32 `json:"priority,omitempty"` } type AIGatewayRouteRuleMatch struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 33b844d7a9..372b6fbba4 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -172,6 +172,11 @@ func (in *AIGatewayRouteRuleBackendRef) DeepCopyInto(out *AIGatewayRouteRuleBack *out = new(int32) **out = **in } + if in.Priority != nil { + in, out := &in.Priority, &out.Priority + *out = new(uint32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AIGatewayRouteRuleBackendRef. diff --git a/cmd/aigw/testdata/translate_basic.out.yaml b/cmd/aigw/testdata/translate_basic.out.yaml index f86d03cf2a..1711a6db73 100644 --- a/cmd/aigw/testdata/translate_basic.out.yaml +++ b/cmd/aigw/testdata/translate_basic.out.yaml @@ -66,6 +66,8 @@ kind: HTTPRoute metadata: name: envoy-ai-gateway-basic namespace: default + annotations: + gateway.envoyproxy.io/backend-ref-priority: 0:envoy-ai-gateway-basic-openai:0,1:envoy-ai-gateway-basic-aws:0,2:envoy-ai-gateway-basic-testupstream:0 ownerReferences: - apiVersion: aigateway.envoyproxy.io/v1alpha1 blockOwnerDeletion: true diff --git a/examples/provider_fallback/base.yaml b/examples/provider_fallback/base.yaml index b695ae41d1..e1a747915d 100644 --- a/examples/provider_fallback/base.yaml +++ b/examples/provider_fallback/base.yaml @@ -42,7 +42,9 @@ spec: value: us.meta.llama3-2-1b-instruct-v1:0 backendRefs: - name: provider-fallback-always-failing-upstream # This is the primary backend and trying to speak TLS, which always fails. + priority: 0 - name: provider-fallback-aws + priority: 1 --- apiVersion: aigateway.envoyproxy.io/v1alpha1 kind: AIServiceBackend @@ -80,8 +82,6 @@ metadata: name: provider-fallback-aws namespace: default spec: - # Indicate that this backend is a fallback backend, meaning that it will only be used if the primary backend fails. - fallback: true endpoints: - fqdn: hostname: bedrock-runtime.us-east-1.amazonaws.com diff --git a/internal/controller/ai_gateway_route.go b/internal/controller/ai_gateway_route.go index 30f06320f8..c396f05cd7 100644 --- a/internal/controller/ai_gateway_route.go +++ b/internal/controller/ai_gateway_route.go @@ -9,6 +9,7 @@ import ( "cmp" "context" "fmt" + "strings" egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/go-logr/logr" @@ -28,12 +29,16 @@ import ( ) const ( - managedByLabel = "app.kubernetes.io/managed-by" - selectedRouteHeaderKey = "x-ai-eg-selected-route" - hostRewriteHTTPFilterName = "ai-eg-host-rewrite" - aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid" - egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name" - egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace" + managedByLabel = "app.kubernetes.io/managed-by" + selectedRouteHeaderKey = "x-ai-eg-selected-route" + hostRewriteHTTPFilterName = "ai-eg-host-rewrite" + aigatewayUUIDAnnotationKey = "aigateway.envoyproxy.io/uuid" + // We use this annotation to ensure that Envoy Gateway reconciles the HTTPRoute when the backend refs change. + // This will result in metadata being added to the underling Envoy route + // @see https://gateway.envoyproxy.io/contributions/design/metadata/ + httpRouteBackendRefPriorityAnnotationKey = "gateway.envoyproxy.io/backend-ref-priority" + egOwningGatewayNameLabel = "gateway.envoyproxy.io/owning-gateway-name" + egOwningGatewayNamespaceLabel = "gateway.envoyproxy.io/owning-gateway-namespace" // apiKeyInSecret is the key to store OpenAI API key. apiKeyInSecret = "apiKey" ) @@ -227,6 +232,12 @@ func (c *AIGatewayRouteController) newHTTPRoute(ctx context.Context, dst *gwapiv dst.Spec.Rules = rules + if dst.ObjectMeta.Annotations == nil { + dst.ObjectMeta.Annotations = make(map[string]string) + } + // HACK: We need to set an annotation so that Envoy Gateway reconciles the HTTPRoute when the backend refs change. + dst.ObjectMeta.Annotations[httpRouteBackendRefPriorityAnnotationKey] = buildPriorityAnnotation(aiGatewayRoute.Spec.Rules) + targetRefs := aiGatewayRoute.Spec.TargetRefs egNs := gwapiv1.Namespace(aiGatewayRoute.Namespace) parentRefs := make([]gwapiv1.ParentReference, len(targetRefs)) @@ -276,3 +287,19 @@ func (c *AIGatewayRouteController) updateAIGatewayRouteStatus(ctx context.Contex c.logger.Error(err, "failed to update AIGatewayRoute status") } } + +// Build an annotation that contains the priority of each backend ref. This is used to ensure Envoy Gateway reconciles the +// HTTP route when the priorities change. +func buildPriorityAnnotation(rules []aigv1a1.AIGatewayRouteRule) string { + priorities := make([]string, 0, len(rules)) + for i, rule := range rules { + for _, br := range rule.BackendRefs { + var priority uint32 + if br.Priority != nil { + priority = *br.Priority + } + priorities = append(priorities, fmt.Sprintf("%d:%s:%d", i, br.Name, priority)) + } + } + return strings.Join(priorities, ",") +} diff --git a/internal/controller/ai_gateway_route_test.go b/internal/controller/ai_gateway_route_test.go index 83c4132b5a..6c25ae7ad5 100644 --- a/internal/controller/ai_gateway_route_test.go +++ b/internal/controller/ai_gateway_route_test.go @@ -176,9 +176,9 @@ func Test_newHTTPRoute(t *testing.T) { }, { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ - {Name: "orange", Weight: ptr.To[int32](100)}, - {Name: "apple", Weight: ptr.To[int32](100)}, - {Name: "pineapple", Weight: ptr.To[int32](100)}, + {Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)}, + {Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)}, + {Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)}, }, }, { @@ -316,3 +316,17 @@ func TestAIGatewayRouteController_updateAIGatewayRouteStatus(t *testing.T) { require.Equal(t, "ok", updatedRoute.Status.Conditions[0].Message) require.Equal(t, aigv1a1.ConditionTypeAccepted, updatedRoute.Status.Conditions[0].Type) } + +func Test_buildPriorityAnnotation(t *testing.T) { + rules := []aigv1a1.AIGatewayRouteRule{ + { + BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ + {Name: "orange", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](0)}, + {Name: "apple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](1)}, + {Name: "pineapple", Weight: ptr.To[int32](100), Priority: ptr.To[uint32](2)}, + }, + }, + } + annotation := buildPriorityAnnotation(rules) + require.Equal(t, "0:orange:0,0:apple:1,0:pineapple:2", annotation) +} diff --git a/internal/extensionserver/extensionserver.go b/internal/extensionserver/extensionserver.go index 1402279f28..49b91015d1 100644 --- a/internal/extensionserver/extensionserver.go +++ b/internal/extensionserver/extensionserver.go @@ -187,6 +187,9 @@ func (s *Server) maybeModifyCluster(cluster *clusterv3.Cluster) { backendRef := httpRouteRule.BackendRefs[i] name := backendRef.Name namespace := aigwRoute.Namespace + if backendRef.Priority != nil { + endpoints.Priority = *backendRef.Priority + } // We populate the same metadata for all endpoints in the LoadAssignment. // This is because currently, an extproc cannot retrieve the endpoint set level metadata. for _, endpoint := range endpoints.LbEndpoints { diff --git a/internal/extensionserver/extensionserver_test.go b/internal/extensionserver/extensionserver_test.go index bdd7a3a15b..43507fd6a4 100644 --- a/internal/extensionserver/extensionserver_test.go +++ b/internal/extensionserver/extensionserver_test.go @@ -17,6 +17,7 @@ import ( "github.com/go-logr/logr" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -108,7 +109,8 @@ func Test_maybeModifyCluster(t *testing.T) { Rules: []aigv1a1.AIGatewayRouteRule{ { BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{ - {Name: "aaa"}, + {Name: "aaa", Priority: ptr.To[uint32](0)}, + {Name: "bbb", Priority: ptr.To[uint32](1)}, }, }, }, @@ -156,6 +158,11 @@ func Test_maybeModifyCluster(t *testing.T) { {}, }, }, + { + LbEndpoints: []*endpointv3.LbEndpoint{ + {}, + }, + }, }, }, } @@ -164,8 +171,10 @@ func Test_maybeModifyCluster(t *testing.T) { s.maybeModifyCluster(cluster) require.Empty(t, buf.String()) - require.Len(t, cluster.LoadAssignment.Endpoints, 1) + require.Len(t, cluster.LoadAssignment.Endpoints, 2) require.Len(t, cluster.LoadAssignment.Endpoints[0].LbEndpoints, 1) + require.Equal(t, uint32(0), cluster.LoadAssignment.Endpoints[0].Priority) + require.Equal(t, uint32(1), cluster.LoadAssignment.Endpoints[1].Priority) md := cluster.LoadAssignment.Endpoints[0].LbEndpoints[0].Metadata require.NotNil(t, md) require.Len(t, md.FilterMetadata, 1) diff --git a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml index fb1229a46c..30a71d4c31 100644 --- a/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml +++ b/manifests/charts/ai-gateway-crds-helm/templates/aigateway.envoyproxy.io_aigatewayroutes.yaml @@ -302,6 +302,17 @@ spec: description: Name is the name of the AIServiceBackend. minLength: 1 type: string + priority: + default: 0 + description: |- + Priority is the priority of the AIServiceBackend. This sets the priority on the underlying endpoints. + See: https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/priority + Note: This will override the `faillback` property of the underlying Envoy Gateway Backend + + Default is 0. + format: int32 + minimum: 0 + type: integer weight: default: 1 description: |- diff --git a/site/docs/api/api.mdx b/site/docs/api/api.mdx index aabf71c89e..571e77a742 100644 --- a/site/docs/api/api.mdx +++ b/site/docs/api/api.mdx @@ -463,6 +463,12 @@ AIGatewayRouteRuleBackendRef is a reference to a backend with a weight. required="false" defaultValue="1" description="Weight is the weight of the AIServiceBackend. This is exactly the same as the weight in
the BackendRef in the Gateway API. See for the details:
https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1.BackendRef
Default is 1." +/>