-
Notifications
You must be signed in to change notification settings - Fork 5k
Fix leaks with metadata processors #16349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f454c5f
a2a191b
7653391
5714239
27c5063
b7e9d1d
ab008f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| // Licensed to Elasticsearch B.V. under one or more contributor | ||
| // license agreements. See the NOTICE file distributed with | ||
| // this work for additional information regarding copyright | ||
| // ownership. Elasticsearch B.V. licenses this file to you under | ||
| // the Apache License, Version 2.0 (the "License"); you may | ||
| // not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| // +build linux darwin windows | ||
| // +build integration | ||
|
|
||
| package add_docker_metadata | ||
|
|
||
| import ( | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
|
|
||
| "github.com/elastic/beats/v7/libbeat/beat" | ||
| "github.com/elastic/beats/v7/libbeat/common" | ||
| "github.com/elastic/beats/v7/libbeat/common/docker" | ||
| "github.com/elastic/beats/v7/libbeat/logp" | ||
| "github.com/elastic/beats/v7/libbeat/processors" | ||
| dockertest "github.com/elastic/beats/v7/libbeat/tests/docker" | ||
| "github.com/elastic/beats/v7/libbeat/tests/resources" | ||
| ) | ||
|
|
||
| func TestAddDockerMetadata(t *testing.T) { | ||
| goroutines := resources.NewGoroutinesChecker() | ||
| defer goroutines.Check(t) | ||
|
|
||
| client, err := docker.NewClient(defaultConfig().Host, nil, nil) | ||
| require.NoError(t, err) | ||
|
|
||
| // Docker clients can affect the goroutines checker because they keep | ||
| // idle keep-alive connections, so we explicitly close them. | ||
| // These idle connections in principle wouldn't represent leaks even if | ||
| // the client is not explicitly closed because they are eventually closed. | ||
| defer client.Close() | ||
|
|
||
| // Start a container to have some data to enrich events | ||
| testClient, err := dockertest.NewClient() | ||
| require.NoError(t, err) | ||
| // Explicitly close client to don't affect goroutines checker | ||
| defer testClient.Close() | ||
|
|
||
| image := "busybox" | ||
| cmd := []string{"sleep", "60"} | ||
| labels := map[string]string{"label": "foo"} | ||
| id, err := testClient.ContainerStart(image, cmd, labels) | ||
| require.NoError(t, err) | ||
| defer testClient.ContainerRemove(id) | ||
|
|
||
| info, err := testClient.ContainerInspect(id) | ||
| require.NoError(t, err) | ||
| pid := info.State.Pid | ||
|
|
||
| config, err := common.NewConfigFrom(map[string]interface{}{ | ||
| "match_fields": []string{"cid"}, | ||
| }) | ||
| watcherConstructor := newWatcherWith(client) | ||
| processor, err := buildDockerMetadataProcessor(logp.L(), config, watcherConstructor) | ||
| require.NoError(t, err) | ||
|
|
||
| t.Run("match container by container id", func(t *testing.T) { | ||
| input := &beat.Event{Fields: common.MapStr{ | ||
| "cid": id, | ||
| }} | ||
| result, err := processor.Run(input) | ||
| require.NoError(t, err) | ||
|
|
||
| resultLabels, _ := result.Fields.GetValue("container.labels") | ||
| expectedLabels := common.MapStr{"label": "foo"} | ||
| assert.Equal(t, expectedLabels, resultLabels) | ||
| assert.Equal(t, id, result.Fields["cid"]) | ||
| }) | ||
|
|
||
| t.Run("match container by process id", func(t *testing.T) { | ||
| input := &beat.Event{Fields: common.MapStr{ | ||
| "cid": id, | ||
| "process.pid": pid, | ||
| }} | ||
| result, err := processor.Run(input) | ||
| require.NoError(t, err) | ||
|
|
||
| resultLabels, _ := result.Fields.GetValue("container.labels") | ||
| expectedLabels := common.MapStr{"label": "foo"} | ||
| assert.Equal(t, expectedLabels, resultLabels) | ||
| assert.Equal(t, id, result.Fields["cid"]) | ||
| }) | ||
|
|
||
| t.Run("don't enrich non existing container", func(t *testing.T) { | ||
| input := &beat.Event{Fields: common.MapStr{ | ||
| "cid": "notexists", | ||
| }} | ||
| result, err := processor.Run(input) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, input.Fields, result.Fields) | ||
| }) | ||
|
|
||
| err = processors.Close(processor) | ||
| require.NoError(t, err) | ||
| } | ||
|
|
||
| func newWatcherWith(client docker.Client) docker.WatcherConstructor { | ||
| return func(log *logp.Logger, host string, tls *docker.TLSConfig, storeShortID bool) (docker.Watcher, error) { | ||
| return docker.NewWatcherWithClient(log, client, 60*time.Second, storeShortID) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,13 +31,15 @@ type cache struct { | |
| timeout time.Duration | ||
| deleted map[string]time.Time // key -> when should this obj be deleted | ||
| metadata map[string]common.MapStr | ||
| done chan struct{} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we also wait for the cache shutdown/Close to be 'completed'?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that for this processor it is ok not to wait, it only needs to stop a goroutine. |
||
| } | ||
|
|
||
| func newCache(cleanupTimeout time.Duration) *cache { | ||
| c := &cache{ | ||
| timeout: cleanupTimeout, | ||
| deleted: make(map[string]time.Time), | ||
| metadata: make(map[string]common.MapStr), | ||
| done: make(chan struct{}), | ||
| } | ||
| go c.cleanup() | ||
| return c | ||
|
|
@@ -67,15 +69,29 @@ func (c *cache) set(key string, data common.MapStr) { | |
| } | ||
|
|
||
| func (c *cache) cleanup() { | ||
| ticker := time.Tick(timeout) | ||
| for now := range ticker { | ||
| c.Lock() | ||
| for k, t := range c.deleted { | ||
| if now.After(t) { | ||
| delete(c.deleted, k) | ||
| delete(c.metadata, k) | ||
| if timeout <= 0 { | ||
| return | ||
| } | ||
|
|
||
| ticker := time.NewTicker(timeout) | ||
| defer ticker.Stop() | ||
| for { | ||
| select { | ||
| case <-c.done: | ||
| return | ||
| case now := <-ticker.C: | ||
| c.Lock() | ||
| for k, t := range c.deleted { | ||
| if now.After(t) { | ||
| delete(c.deleted, k) | ||
| delete(c.metadata, k) | ||
| } | ||
| } | ||
| c.Unlock() | ||
| } | ||
| c.Unlock() | ||
| } | ||
| } | ||
|
|
||
| func (c *cache) stop() { | ||
| close(c.done) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -242,6 +242,16 @@ func (k *kubernetesAnnotator) Run(event *beat.Event) (*beat.Event, error) { | |
| return event, nil | ||
| } | ||
|
|
||
| func (k *kubernetesAnnotator) Close() error { | ||
| if k.watcher != nil { | ||
| k.watcher.Stop() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I missed it, but does the watcher use the case? If so, does this Stop here block until the watcher is done before we continue closing the cache? Instead of channels consider to use context.Context for cancellation. Grouping tasks and waiting for completion can be done e.g. via
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, the watcher actually uses the cache. But I don't think this is a problem by now, the cache can be used even after being stopped, it only doesn't clean expired entries. Changing the watcher to use context cancellation instead of Start/Stop will require some refactors. I will give a try to improve this, let me know if you don't consider it so blocking 🙂
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sorry, I was wrong, the informer used by |
||
| } | ||
| if k.cache != nil { | ||
| k.cache.stop() | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (k *kubernetesAnnotator) addPod(pod *kubernetes.Pod) { | ||
| metadata := k.indexers.GetMetadata(pod) | ||
| for _, m := range metadata { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.