generated from kubernetes/kubernetes-template-project
-
Notifications
You must be signed in to change notification settings - Fork 199
Pluggable metrics collection #1237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
045f973
pluggable metrics collection
elevran 1d502d6
use the provided context
elevran fc2519d
derive new context outside anonymous function
elevran b47a4a2
make fatcontext happy?
elevran 24d8571
mock ticker
elevran 0c9475c
split LoRA metric Spec from standard Spec
elevran d2082d6
add collector test
elevran 77a2bca
address review comments
elevran 809ed0c
review comments on use of client
elevran File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| /* | ||
| Copyright 2025 The Kubernetes Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package datalayer | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "sigs.k8s.io/controller-runtime/pkg/log" | ||
|
|
||
| "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" | ||
| ) | ||
|
|
||
| // TODO: | ||
| // currently the data store is expected to manage the state of multiple | ||
| // Collectors (e.g., using sync.Map mapping pod to its Collector). Alternatively, | ||
| // this can be encapsulated in this file, providing the data store with an interface | ||
| // to only update on endpoint addition/change and deletion. This can also be used | ||
| // to centrally track statistics such errors, active routines, etc. | ||
|
|
||
| const ( | ||
| defaultCollectionTimeout = time.Second | ||
| ) | ||
|
|
||
| // Ticker implements a time source for periodic invocation. | ||
| // The Ticker is passed in as parameter a Collector to allow control over time | ||
| // progress in tests, ensuring tests are deterministic and fast. | ||
| type Ticker interface { | ||
| Channel() <-chan time.Time | ||
| Stop() | ||
| } | ||
|
|
||
| // TimeTicker implements a Ticker based on time.Ticker. | ||
| type TimeTicker struct { | ||
| *time.Ticker | ||
| } | ||
|
|
||
| // NewTimeTicker returns a new time.Ticker with the configured duration. | ||
| func NewTimeTicker(d time.Duration) Ticker { | ||
| return &TimeTicker{ | ||
| Ticker: time.NewTicker(d), | ||
| } | ||
| } | ||
|
|
||
| // Channel exposes the ticker's channel. | ||
| func (t *TimeTicker) Channel() <-chan time.Time { | ||
| return t.C | ||
| } | ||
|
|
||
| // Collector runs the data collection for a single endpoint. | ||
| type Collector struct { | ||
| // per-endpoint context and cancellation | ||
| ctx context.Context | ||
| cancel context.CancelFunc | ||
|
|
||
| // goroutine management | ||
| startOnce sync.Once | ||
| stopOnce sync.Once | ||
|
|
||
| // TODO: optional metrics tracking collection (e.g., errors, invocations, ...) | ||
| } | ||
|
|
||
| // NewCollector returns a new collector. | ||
| func NewCollector() *Collector { | ||
| return &Collector{} | ||
| } | ||
|
|
||
| // Start initiates data source collection for the endpoint. | ||
| func (c *Collector) Start(ctx context.Context, ticker Ticker, ep Endpoint, sources []DataSource) error { | ||
elevran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| started := false | ||
| c.startOnce.Do(func() { | ||
| c.ctx, c.cancel = context.WithCancel(ctx) | ||
| started = true | ||
|
|
||
| go func(endpoint Endpoint, sources []DataSource) { | ||
| logger := log.FromContext(ctx).WithValues("endpoint", ep.GetPod().GetIPAddress()) | ||
| logger.V(logging.DEFAULT).Info("starting collection") | ||
|
|
||
| defer func() { | ||
| logger.V(logging.DEFAULT).Info("terminating collection") | ||
| ticker.Stop() | ||
| }() | ||
|
|
||
| for { | ||
| select { | ||
| case <-c.ctx.Done(): // per endpoint context cancelled | ||
| return | ||
| case <-ticker.Channel(): | ||
| for _, src := range sources { | ||
| ctx, cancel := context.WithTimeout(c.ctx, defaultCollectionTimeout) | ||
| _ = src.Collect(ctx, endpoint) // TODO: track errors per collector? | ||
| cancel() // release the ctx timeout resources | ||
elevran marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| } | ||
| }(ep, sources) | ||
| }) | ||
|
|
||
| if !started { | ||
| return errors.New("collector start called multiple times") | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // Stop terminates the collector. | ||
| func (c *Collector) Stop() error { | ||
| if c.ctx == nil || c.cancel == nil { | ||
| return errors.New("collector stop called before start") | ||
| } | ||
|
|
||
| stopped := false | ||
| c.stopOnce.Do(func() { | ||
| stopped = true | ||
| c.cancel() | ||
| }) | ||
|
|
||
| if !stopped { | ||
| return errors.New("collector stop called multiple times") | ||
| } | ||
| return nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| /* | ||
| Copyright 2025 The Kubernetes Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package datalayer | ||
|
|
||
| import ( | ||
| "context" | ||
| "sync/atomic" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| corev1 "k8s.io/api/core/v1" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
|
||
| "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/mocks" | ||
| ) | ||
|
|
||
| // --- Test Stubs --- | ||
|
|
||
| type DummySource struct { | ||
| callCount int64 | ||
| } | ||
|
|
||
| func (d *DummySource) Name() string { return "test-dummy-data-source" } | ||
| func (d *DummySource) AddExtractor(_ Extractor) error { return nil } | ||
| func (d *DummySource) Collect(ctx context.Context, ep Endpoint) error { | ||
| atomic.AddInt64(&d.callCount, 1) | ||
| return nil | ||
| } | ||
|
|
||
| func defaultEndpoint() Endpoint { | ||
| ms := NewEndpoint() | ||
| pod := &corev1.Pod{ | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Name: "pod-name", | ||
| Namespace: "default", | ||
| }, | ||
| Status: corev1.PodStatus{ | ||
| PodIP: "1.2.3.4", | ||
| }, | ||
| } | ||
| ms.UpdatePod(pod) | ||
| return ms | ||
| } | ||
|
|
||
| // --- Tests --- | ||
|
|
||
| var ( | ||
| endpoint = defaultEndpoint() | ||
| sources = []DataSource{&DummySource{}} | ||
| ) | ||
|
|
||
| func TestCollectorCanStartOnlyOnce(t *testing.T) { | ||
| c := NewCollector() | ||
| ctx := context.Background() | ||
| ticker := mocks.NewTicker() | ||
|
|
||
| err := c.Start(ctx, ticker, endpoint, sources) | ||
| require.NoError(t, err, "first Start call should succeed") | ||
|
|
||
| err = c.Start(ctx, ticker, endpoint, sources) | ||
| assert.Error(t, err, "multiple collector start should error") | ||
| } | ||
|
|
||
| func TestCollectorStopBeforeStartIsAnError(t *testing.T) { | ||
| c := NewCollector() | ||
| err := c.Stop() | ||
| assert.Error(t, err, "collector stop called before start should error") | ||
| } | ||
|
|
||
| func TestCollectorCanStopOnlyOnce(t *testing.T) { | ||
| c := NewCollector() | ||
| ctx := context.Background() | ||
| ticker := mocks.NewTicker() | ||
|
|
||
| require.NoError(t, c.Start(ctx, ticker, endpoint, sources)) | ||
| require.NoError(t, c.Stop(), "first Stop should succeed") | ||
| assert.Error(t, c.Stop(), "second Stop should fail") | ||
| } | ||
|
|
||
| func TestCollectorCollectsOnTicks(t *testing.T) { | ||
| source := &DummySource{} | ||
| c := NewCollector() | ||
| ticker := mocks.NewTicker() | ||
| ctx := context.Background() | ||
| require.NoError(t, c.Start(ctx, ticker, endpoint, []DataSource{source})) | ||
|
|
||
| ticker.Tick() | ||
| ticker.Tick() | ||
| time.Sleep(20 * time.Millisecond) // let collector process the ticks | ||
|
|
||
| got := atomic.LoadInt64(&source.callCount) | ||
| want := int64(2) | ||
| assert.Equal(t, want, got, "call count mismatch") | ||
| require.NoError(t, c.Stop()) | ||
| } | ||
|
|
||
| func TestCollectorStopCancelsContext(t *testing.T) { | ||
| source := &DummySource{} | ||
| c := NewCollector() | ||
| ticker := mocks.NewTicker() | ||
| ctx := context.Background() | ||
|
|
||
| require.NoError(t, c.Start(ctx, ticker, endpoint, []DataSource{source})) | ||
| ticker.Tick() // should be processed | ||
| time.Sleep(20 * time.Millisecond) | ||
|
|
||
| require.NoError(t, c.Stop()) | ||
| before := atomic.LoadInt64(&source.callCount) | ||
|
|
||
| ticker.Tick() | ||
| time.Sleep(20 * time.Millisecond) // let collector run again | ||
| after := atomic.LoadInt64(&source.callCount) | ||
| assert.Equal(t, before, after, "call count changed after stop") | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.