Skip to content

Commit d886b15

Browse files
authored
Merge pull request #1820 from zetxqx/modelrerwiteimpl
feat: Implement Model Rewrite and Traffic Splitting Logic
2 parents acd7103 + a109e90 commit d886b15

File tree

19 files changed

+1263
-89
lines changed

19 files changed

+1263
-89
lines changed

apix/v1alpha2/inferencemodelrewrite_types.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,25 @@ type InferenceModelRewriteSpec struct {
5757
// If multiple InferenceModelRewrite resources target the same
5858
// InferencePool, the controller will merge them based on precedence.
5959
//
60-
// **Timestamp Wins:** If two rules from different rewrites all matches,
61-
// the rule from the *oldest*
62-
// InferenceModelRewrite resource (determined by
63-
// metadata.creationTimestamp) will be used.
60+
// Across all rules specified on applicable rewrites, precedence MUST be
61+
// given to the match having an "Exact" model match over a generic match
62+
// (a rule with an empty `matches` array).
63+
//
64+
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
65+
// two rewrites both have an exact match for the same model), matching
66+
// precedence MUST be determined by the oldest resource based on
67+
// creation timestamp.
68+
//
69+
// If ties still exist within a single InferenceModelRewrite resource, the
70+
// FIRST matching rule (in list order) is used.
6471
// +required
6572
Rules []InferenceModelRewriteRule `json:"rules"`
6673
}
6774

6875
// InferenceModelRewriteRule defines the match criteria and corresponding action.
69-
//
70-
// A specific model name can only be matched by one rule across all
71-
// rules attached to the same InferencePool. If multiple rules attempt
72-
// to match the same model name, the oldest rule (by creationTimestamp)
73-
// will be the only one considered valid.
76+
// For details on how precedence is determined across multiple rules and
77+
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
78+
// section in InferenceModelRewriteSpec.
7479
type InferenceModelRewriteRule struct {
7580
// Matches defines the criteria for matching a request.
7681
// If multiple match criteria are specified, a request matches if
@@ -87,7 +92,7 @@ type InferenceModelRewriteRule struct {
8792
// +optional
8893
// +kubebuilder:validation:MinItems=1
8994
//
90-
Targets []TargetModel `json:"split,omitempty"`
95+
Targets []TargetModel `json:"targets,omitempty"`
9196
}
9297

9398
// TargetModel defines a weighted model destination for traffic distribution.

client-go/applyconfiguration/apix/v1alpha2/inferencemodelrewriterule.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/charts/inferencepool/templates/rbac.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ metadata:
4646
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
4747
rules:
4848
- apiGroups: ["inference.networking.x-k8s.io"]
49-
resources: ["inferenceobjectives"]
49+
resources: ["inferenceobjectives", "inferencemodelrewrites"]
5050
verbs: ["get", "watch", "list"]
5151
- apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"]
5252
resources: ["inferencepools"]

config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,9 @@ spec:
7474
items:
7575
description: |-
7676
InferenceModelRewriteRule defines the match criteria and corresponding action.
77-
78-
A specific model name can only be matched by one rule across all
79-
rules attached to the same InferencePool. If multiple rules attempt
80-
to match the same model name, the oldest rule (by creationTimestamp)
81-
will be the only one considered valid.
77+
For details on how precedence is determined across multiple rules and
78+
InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
79+
section in InferenceModelRewriteSpec.
8280
properties:
8381
matches:
8482
items:
@@ -110,7 +108,7 @@ spec:
110108
- model
111109
type: object
112110
type: array
113-
split:
111+
targets:
114112
items:
115113
description: TargetModel defines a weighted model destination
116114
for traffic distribution.

docs/proposals/1816-inferenceomodelrewrite/README.md

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,25 @@ type InferenceModelRewriteSpec struct {
6464
// If multiple InferenceModelRewrite resources target the same
6565
// InferencePool, the controller will merge them based on precedence.
6666
//
67-
// **Timestamp Wins:** If two rules from different rewrite all matches,
68-
// the rule from the *oldest*
69-
// InferenceModelRewrite resource (determined by
70-
// metadata.creationTimestamp) will be used.
67+
// Across all rules specified on applicable rewrites, precedence MUST be
68+
// given to the match having an "Exact" model match over a generic match
69+
// (a rule with an empty `matches` array).
70+
//
71+
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
72+
// two rewrites both have an exact match for the same model), matching
73+
// precedence MUST be determined by the oldest resource based on
74+
// creation timestamp.
75+
//
76+
// If ties still exist within a single InferenceModelRewrite resource, the
77+
// FIRST matching rule (in list order) is used.
7178
// +required
7279
Rules []InferenceModelRewriteRule `json:"rules"`
7380
}
7481

7582
// InferenceModelRewriteRule defines the match criteria and corresponding action.
76-
//
77-
// A specific model name can only be matched by one rule across all
78-
// rewrites attached to the same InferencePool. If multiple rules attempt
79-
// to match the same model name, the oldest rule (by creationTimestamp)
80-
// will be the only one considered valid.
83+
// For details on how precedence is determined across multiple rules and
84+
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
85+
// section in InferenceModelRewriteSpec.
8186
type InferenceModelRewriteRule struct {
8287
// Matches defines the criteria for matching a request.
8388
// If multiple match criteria are specified, a request matches if
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controller
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
"k8s.io/apimachinery/pkg/api/errors"
24+
ctrl "sigs.k8s.io/controller-runtime"
25+
"sigs.k8s.io/controller-runtime/pkg/client"
26+
"sigs.k8s.io/controller-runtime/pkg/event"
27+
"sigs.k8s.io/controller-runtime/pkg/log"
28+
"sigs.k8s.io/controller-runtime/pkg/predicate"
29+
30+
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
31+
"sigs.k8s.io/gateway-api-inference-extension/pkg/common"
32+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
33+
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
34+
)
35+
36+
type InferenceModelRewriteReconciler struct {
37+
client.Reader
38+
Datastore datastore.Datastore
39+
PoolGKNN common.GKNN
40+
}
41+
42+
func (c *InferenceModelRewriteReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
43+
logger := log.FromContext(ctx).V(logutil.DEFAULT)
44+
ctx = ctrl.LoggerInto(ctx, logger)
45+
46+
logger.Info("Reconciling InferenceModelRewrite")
47+
48+
infModelRewrite := &v1alpha2.InferenceModelRewrite{}
49+
notFound := false
50+
if err := c.Get(ctx, req.NamespacedName, infModelRewrite); err != nil {
51+
if !errors.IsNotFound(err) {
52+
return ctrl.Result{}, fmt.Errorf("unable to get InferenceModelRewrite - %w", err)
53+
}
54+
notFound = true
55+
}
56+
57+
isDeleted := !infModelRewrite.DeletionTimestamp.IsZero()
58+
isPooRefUnmatch := infModelRewrite.Spec.PoolRef == nil ||
59+
infModelRewrite.Spec.PoolRef.Name != v1alpha2.ObjectName(c.PoolGKNN.Name) ||
60+
infModelRewrite.Spec.PoolRef.Group != v1alpha2.Group(c.PoolGKNN.Group)
61+
62+
if notFound || isDeleted || isPooRefUnmatch {
63+
// InferenceModelRewrite object got deleted or changed the referenced pool.
64+
c.Datastore.ModelRewriteDelete(req.NamespacedName)
65+
return ctrl.Result{}, nil
66+
}
67+
68+
// Add or update if the InferenceModelRewrite instance has a creation timestamp older than the existing entry of the model.
69+
logger = logger.WithValues("poolRef", infModelRewrite.Spec.PoolRef)
70+
c.Datastore.ModelRewriteSet(infModelRewrite)
71+
logger.Info("Added/Updated InferenceModelRewrite")
72+
73+
return ctrl.Result{}, nil
74+
}
75+
76+
func (c *InferenceModelRewriteReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
77+
return ctrl.NewControllerManagedBy(mgr).
78+
For(&v1alpha2.InferenceModelRewrite{}).
79+
WithEventFilter(predicate.Funcs{
80+
CreateFunc: func(e event.CreateEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
81+
UpdateFunc: func(e event.UpdateEvent) bool {
82+
return c.eventPredicate(e.ObjectOld.(*v1alpha2.InferenceModelRewrite)) || c.eventPredicate(e.ObjectNew.(*v1alpha2.InferenceModelRewrite))
83+
},
84+
DeleteFunc: func(e event.DeleteEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
85+
GenericFunc: func(e event.GenericEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
86+
}).
87+
Complete(c)
88+
}
89+
90+
func (c *InferenceModelRewriteReconciler) eventPredicate(infModelRewrite *v1alpha2.InferenceModelRewrite) bool {
91+
return infModelRewrite.Spec.PoolRef != nil && string(infModelRewrite.Spec.PoolRef.Name) == c.PoolGKNN.Name && string(infModelRewrite.Spec.PoolRef.Group) == c.PoolGKNN.Group
92+
}

0 commit comments

Comments
 (0)