diff --git a/.chloggen/issue-39491.yaml b/.chloggen/issue-39491.yaml new file mode 100644 index 0000000000000..e4938d310f433 --- /dev/null +++ b/.chloggen/issue-39491.yaml @@ -0,0 +1,29 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: receiver/filelog + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Suppress repeated permission-denied errors + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [39491] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Only one error is logged per file per process run, and an informational message is emitted when the file becomes readable again. + This reduces log spam and improves clarity for operators. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/pkg/stanza/fileconsumer/file.go b/pkg/stanza/fileconsumer/file.go index 5982cabb63d14..c08bebbd8526a 100644 --- a/pkg/stanza/fileconsumer/file.go +++ b/pkg/stanza/fileconsumer/file.go @@ -5,7 +5,9 @@ package fileconsumer // import "github.com/open-telemetry/opentelemetry-collecto import ( "context" + "errors" "fmt" + "io/fs" "os" "sync" "time" @@ -22,6 +24,12 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" ) +const ( + // maxUnreadableEntries limits the number of paths tracked in the unreadable map + // to prevent memory issues when many files have permission errors. + maxUnreadableEntries = 10000 +) + type Manager struct { set component.TelemetrySettings wg sync.WaitGroup @@ -39,12 +47,17 @@ type Manager struct { pollsToArchive int telemetryBuilder *metadata.TelemetryBuilder + + unreadable map[string]struct{} } func (m *Manager) Start(persister operator.Persister) error { ctx, cancel := context.WithCancel(context.Background()) m.cancel = cancel + // initialize runtime-only tracking of unreadable paths + m.unreadable = make(map[string]struct{}) + if _, err := m.fileMatcher.MatchFiles(); err != nil { m.set.Logger.Warn("finding files", zap.Error(err)) } @@ -175,6 +188,8 @@ func (m *Manager) consume(ctx context.Context, paths []string) { m.telemetryBuilder.FileconsumerOpenFiles.Add(ctx, int64(0-m.tracker.EndConsume())) } +// makeFingerprint opens `path` and computes a fingerprint for the file +// and contains logic to only log file permission errors once per file per startup func (m *Manager) makeFingerprint(path string) (*fingerprint.Fingerprint, *os.File) { // Normalize the path to handle Windows UNC paths correctly normalizedPath, wasCorrupted := normalizePath(path) @@ -183,10 +198,29 @@ func (m *Manager) makeFingerprint(path string) (*fingerprint.Fingerprint, *os.Fi } file, err := os.Open(normalizedPath) // #nosec - operator must read in files defined by user if err != nil { - m.set.Logger.Error("Failed to open file", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath)) + // If a file is unreadable due to permissions error, store path in map and log error once (unless in debug mode) + if errors.Is(err, fs.ErrPermission) { + _, seen := m.unreadable[path] + if !seen { + // Limit map size to prevent unbounded growth + if len(m.unreadable) < maxUnreadableEntries { + m.unreadable[path] = struct{}{} + } + m.set.Logger.Error("Failed to open file - unreadable", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath)) + } + } else { + // For non-permission errors, always log + m.set.Logger.Error("Failed to open file", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath)) + } return nil, nil } + // Notify if previously unreadable file is now able to be read + if _, seen := m.unreadable[path]; seen { + m.set.Logger.Info("Previously unreadable file is now readable", zap.String("path", path)) + delete(m.unreadable, path) + } + fp, err := m.readerFactory.NewFingerprint(file) if err != nil { if err = file.Close(); err != nil { diff --git a/pkg/stanza/fileconsumer/unreadable_test.go b/pkg/stanza/fileconsumer/unreadable_test.go new file mode 100644 index 0000000000000..e5b43d274a26d --- /dev/null +++ b/pkg/stanza/fileconsumer/unreadable_test.go @@ -0,0 +1,165 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package fileconsumer + +import ( + "os" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/internal/filetest" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" +) + +// TestUnreadableFileLoggedOnce verifies that permission-denied errors when +// opening files are logged only once per file per process run, and that an +// informational message is emitted when the file later becomes readable. +func TestUnreadableFileLoggedOnce(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("permission manipulation tests are not reliable on Windows") + } + + t.Parallel() + + tempDir := t.TempDir() + cfg := NewConfig().includeDir(tempDir) + operator, _ := testManager(t, cfg) + + // Create a file and remove permissions so open will fail + f := filetest.OpenTemp(t, tempDir) + _, err := f.WriteString("abc\n") + require.NoError(t, err) + require.NoError(t, f.Close()) + require.NoError(t, os.Chmod(f.Name(), 0)) + + core, obs := observer.New(zapcore.DebugLevel) + logger := zap.New(core) + set := componenttest.NewNopTelemetrySettings() + set.Logger = logger + + operator.set.Logger = set.Logger + require.NoError(t, operator.Start(testutil.NewUnscopedMockPersister())) + defer func() { + require.NoError(t, operator.Stop()) + }() + + // First poll should attempt to open and log an error once + operator.poll(t.Context()) + + // Verify the unreadable map recorded the path and exactly one error was logged + require.Eventually(t, func() bool { + return len(operator.unreadable) == 1 + }, 2*time.Second, 10*time.Millisecond, "expected unreadable map to have one entry after first poll") + + require.Eventually(t, func() bool { + countErrMsgs := 0 + for _, e := range obs.All() { + if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file - unreadable" { + countErrMsgs++ + } + } + return countErrMsgs == 1 + }, 2*time.Second, 10*time.Millisecond, "expected exactly one 'Failed to open file - unreadable' error after first poll") + + // Second poll should not add another error-level log for the same path + operator.poll(t.Context()) + + require.Eventually(t, func() bool { + countErrMsgs := 0 + for _, e := range obs.All() { + if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file - unreadable" { + countErrMsgs++ + } + } + return countErrMsgs == 1 + }, 2*time.Second, 10*time.Millisecond, "expected still exactly one 'Failed to open file - unreadable' error after second poll") + + // Verify the unreadable map still contains the entry (no reinitialization) + require.Len(t, operator.unreadable, 1, "expected unreadable map to still have one entry after second poll") + + // Now make the file readable again and poll; should emit an info message + require.NoError(t, os.Chmod(f.Name(), 0o644)) + operator.poll(t.Context()) + + require.Eventually(t, func() bool { + for _, e := range obs.All() { + if e.Level == zapcore.InfoLevel && e.Message == "Previously unreadable file is now readable" { + return true + } + } + return false + }, 2*time.Second, 10*time.Millisecond, "expected at least one info message when file becomes readable") +} + +// TestNonPermissionErrorsAlwaysLogged verifies that errors other than permission +// errors (e.g., file not found) are always logged and not suppressed. +func TestNonPermissionErrorsAlwaysLogged(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + cfg := NewConfig().includeDir(tempDir) + operator, _ := testManager(t, cfg) + + core, obs := observer.New(zapcore.DebugLevel) + logger := zap.New(core) + set := componenttest.NewNopTelemetrySettings() + set.Logger = logger + + operator.set.Logger = set.Logger + require.NoError(t, operator.Start(testutil.NewUnscopedMockPersister())) + defer func() { + require.NoError(t, operator.Stop()) + }() + + // Directly call makeFingerprint with a non-existent file path + // This simulates a file being deleted after matching but before opening + nonExistentPath := tempDir + "/non_existent_file.log" + + // First call should log an error + fp, file := operator.makeFingerprint(nonExistentPath) + require.Nil(t, fp) + require.Nil(t, file) + + require.Eventually(t, func() bool { + for _, e := range obs.All() { + if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" { + return true + } + } + return false + }, 2*time.Second, 10*time.Millisecond, "expected an error log on first call for non-existent file") + + countBeforeSecondCall := 0 + for _, e := range obs.All() { + if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" { + countBeforeSecondCall++ + } + } + + // Second call should also log an error (not suppressed like permission errors) + fp, file = operator.makeFingerprint(nonExistentPath) + require.Nil(t, fp) + require.Nil(t, file) + + require.Eventually(t, func() bool { + countAfterSecondCall := 0 + for _, e := range obs.All() { + if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" { + countAfterSecondCall++ + } + } + // Should have at least one more error than before + return countAfterSecondCall > countBeforeSecondCall + }, 2*time.Second, 10*time.Millisecond, "expected another error log on second call for non-permission errors") + + // Verify the unreadable map is empty (non-permission errors shouldn't be tracked) + require.Empty(t, operator.unreadable, "expected unreadable map to be empty for non-permission errors") +}