Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions changelog/fragments/1754346816-fleet-enc-save-retries.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: On Windows, retry saving the Agent information file to disk

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: >
Saving the Agent information file involves renaming/moving a file to its final
destination. However, on Windows, it is sometimes not possible to rename/move a
file to its destination file because the destination file is locked by another
process (e.g. antivirus software). For such situations, we now retry the save operation
on Windows.
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/9224

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/5862
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func (h *PolicyChangeHandler) handlePolicyChange(ctx context.Context, c *config.
}

// persist configuration
err = saveConfig(h.agentInfo, h.config, h.store)
err = saveConfig(h.agentInfo, h.config, h.store, h.log)
if err != nil {
return fmt.Errorf("saving config: %w", err)
}
Expand Down Expand Up @@ -379,7 +379,7 @@ func (h *PolicyChangeHandler) applyLoggingConfig(ctx context.Context, loggingCon
return h.policyLogLevelSetter.SetLogLevel(ctx, policyLogLevel)
}

func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configuration, store storage.Store) error {
func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configuration, store storage.Store, log *logger.Logger) error {
if validatedConfig == nil {
// nothing to do for fleet hosts
return nil
Expand All @@ -391,8 +391,7 @@ func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configurati
errors.TypeUnexpected, errors.M("hosts", validatedConfig.Fleet.Client.Hosts))
}

err = store.Save(reader)
if err != nil {
if err := saveConfigToStore(store, reader, log); err != nil {
return errors.New(
err, "fail to persist new Fleet Server API client hosts",
errors.TypeFilesystem, errors.M("hosts", validatedConfig.Fleet.Client.Hosts))
Expand Down Expand Up @@ -448,7 +447,7 @@ func clientEqual(k1 remote.Config, k2 remote.Config) bool {
return true
}

func fleetToReader(agentID string, headers map[string]string, cfg *configuration.Configuration) (io.Reader, error) {
func fleetToReader(agentID string, headers map[string]string, cfg *configuration.Configuration) (io.ReadSeeker, error) {
configToStore := map[string]interface{}{
"fleet": cfg.Fleet,
"agent": map[string]interface{}{ // Add event logging configuration here!
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

//go:build !windows

package handlers

import (
"io"

"github.com/elastic/elastic-agent/pkg/core/logger"

"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
)

// saveConfigToStore saves the given configuration (reader) to the given store
func saveConfigToStore(store storage.Store, reader io.Reader, _ *logger.Logger) error {
return store.Save(reader)
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,23 @@ package handlers
import (
"context"
"math"
"math/rand/v2"
"os"
"path/filepath"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-client/v7/pkg/proto"
"github.com/elastic/elastic-agent-libs/logp"

"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
"github.com/elastic/elastic-agent/pkg/component"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
)

type testAction struct {
Expand Down Expand Up @@ -215,3 +221,36 @@ func TestProxiedActionsNotifier(t *testing.T) {
})
}
}

func TestSaveConfigToStore(t *testing.T) {
// Create destination file
tmpDir := t.TempDir()
dest := filepath.Join(tmpDir, "dest.txt")
err := os.WriteFile(dest, []byte("existing content"), 0644)
require.NoError(t, err)

// Open handle on destination file and keep it open for a random duration
// between [200ms, 1800ms).
openDuration := time.Duration(200+rand.Int64N(1800-200)) * time.Millisecond
destFile, err := os.Open(dest)
require.NoError(t, err)
time.AfterFunc(openDuration, func() {
destFile.Close()
})
defer destFile.Close()

// Create disk store
store, err := storage.NewDiskStore(dest)
require.NoError(t, err)

// Try to save content to store
reader := strings.NewReader("new content")
log, _ := loggertest.New("test")
err = saveConfigToStore(store, reader, log)
require.NoError(t, err)

// Check that dest file has been replaced with new file
data, err := os.ReadFile(dest)
require.NoError(t, err)
require.Equal(t, "new content", string(data))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

//go:build windows

package handlers

import (
"context"
"errors"
"io"
"syscall"
"time"

"github.com/cenkalti/backoff/v4"

"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
"github.com/elastic/elastic-agent/pkg/core/logger"
)

const saveRetryInterval = 50 * time.Millisecond
const saveRetryDuration = 2 * time.Second

// saveConfigToStore saves the given configuration (reader) to the given store.
// On Windows platforms, the save operation is retried if the error is an
// ACCESS_DENIED error, which can happen if the file is locked by another process.
func saveConfigToStore(store storage.Store, reader io.ReadSeeker, log *logger.Logger) error {
ctx, cancel := context.WithTimeout(context.Background(), saveRetryDuration)
defer cancel()

bo := backoff.WithContext(backoff.NewConstantBackOff(saveRetryInterval), ctx)

return backoff.Retry(func() error {
err := store.Save(reader)
if err == nil {
// Save succeeded
return nil
}

if !errors.Is(err, syscall.ERROR_ACCESS_DENIED) {
// Save failed due to an error that is not ACCESS_DENIED. Immediately
// indicate failure without retrying further.
log.Debugf("Saving configuration to store failed: %v. Not retrying.", err)
return backoff.Permanent(err)
}

if _, seekErr := reader.Seek(0, io.SeekStart); seekErr != nil {
log.Debugf("Saving configuration to store failed: %v. Failed to reset reader: %v. Not retrying.", err, seekErr)
return backoff.Permanent(errors.Join(err, seekErr))
}

log.Debugf("Saving configuration to store failed: %v. Retrying...", err)
return err
}, bo)
}
Loading