Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .chloggen/issue-39491.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: receiver/filelog

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Suppress repeated permission-denied errors

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [39491]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Only one error is logged per file per process run, and an informational message is emitted when the file becomes readable again.
This reduces log spam and improves clarity for operators.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
36 changes: 35 additions & 1 deletion pkg/stanza/fileconsumer/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ package fileconsumer // import "github.com/open-telemetry/opentelemetry-collecto

import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"sync"
"time"
Expand All @@ -22,6 +24,12 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
)

const (
// maxUnreadableEntries limits the number of paths tracked in the unreadable map
// to prevent memory issues when many files have permission errors.
maxUnreadableEntries = 10000
)

type Manager struct {
set component.TelemetrySettings
wg sync.WaitGroup
Expand All @@ -39,12 +47,17 @@ type Manager struct {
pollsToArchive int

telemetryBuilder *metadata.TelemetryBuilder

unreadable map[string]struct{}
Comment thread
khpeet marked this conversation as resolved.
}

func (m *Manager) Start(persister operator.Persister) error {
ctx, cancel := context.WithCancel(context.Background())
m.cancel = cancel

// initialize runtime-only tracking of unreadable paths
m.unreadable = make(map[string]struct{})

if _, err := m.fileMatcher.MatchFiles(); err != nil {
m.set.Logger.Warn("finding files", zap.Error(err))
}
Expand Down Expand Up @@ -175,6 +188,8 @@ func (m *Manager) consume(ctx context.Context, paths []string) {
m.telemetryBuilder.FileconsumerOpenFiles.Add(ctx, int64(0-m.tracker.EndConsume()))
}

// makeFingerprint opens `path` and computes a fingerprint for the file
// and contains logic to only log file permission errors once per file per startup
func (m *Manager) makeFingerprint(path string) (*fingerprint.Fingerprint, *os.File) {
// Normalize the path to handle Windows UNC paths correctly
normalizedPath, wasCorrupted := normalizePath(path)
Expand All @@ -183,10 +198,29 @@ func (m *Manager) makeFingerprint(path string) (*fingerprint.Fingerprint, *os.Fi
}
file, err := os.Open(normalizedPath) // #nosec - operator must read in files defined by user
if err != nil {
m.set.Logger.Error("Failed to open file", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath))
// If a file is unreadable due to permissions error, store path in map and log error once (unless in debug mode)
if errors.Is(err, fs.ErrPermission) {
_, seen := m.unreadable[path]
if !seen {
// Limit map size to prevent unbounded growth
if len(m.unreadable) < maxUnreadableEntries {
m.unreadable[path] = struct{}{}
}
m.set.Logger.Error("Failed to open file - unreadable", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath))
}
} else {
// For non-permission errors, always log
m.set.Logger.Error("Failed to open file", zap.Error(err), zap.String("original_path", path), zap.String("normalized_path", normalizedPath))
}
return nil, nil
}

// Notify if previously unreadable file is now able to be read
if _, seen := m.unreadable[path]; seen {
m.set.Logger.Info("Previously unreadable file is now readable", zap.String("path", path))
delete(m.unreadable, path)
}

fp, err := m.readerFactory.NewFingerprint(file)
if err != nil {
if err = file.Close(); err != nil {
Expand Down
165 changes: 165 additions & 0 deletions pkg/stanza/fileconsumer/unreadable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package fileconsumer

import (
"os"
"runtime"
"testing"
"time"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component/componenttest"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest/observer"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/internal/filetest"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
)

// TestUnreadableFileLoggedOnce verifies that permission-denied errors when
// opening files are logged only once per file per process run, and that an
// informational message is emitted when the file later becomes readable.
func TestUnreadableFileLoggedOnce(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission manipulation tests are not reliable on Windows")
}

t.Parallel()

tempDir := t.TempDir()
cfg := NewConfig().includeDir(tempDir)
operator, _ := testManager(t, cfg)

// Create a file and remove permissions so open will fail
f := filetest.OpenTemp(t, tempDir)
_, err := f.WriteString("abc\n")
require.NoError(t, err)
require.NoError(t, f.Close())
require.NoError(t, os.Chmod(f.Name(), 0))

core, obs := observer.New(zapcore.DebugLevel)
logger := zap.New(core)
set := componenttest.NewNopTelemetrySettings()
set.Logger = logger

operator.set.Logger = set.Logger
require.NoError(t, operator.Start(testutil.NewUnscopedMockPersister()))
defer func() {
require.NoError(t, operator.Stop())
}()

// First poll should attempt to open and log an error once
operator.poll(t.Context())

// Verify the unreadable map recorded the path and exactly one error was logged
require.Eventually(t, func() bool {
return len(operator.unreadable) == 1
}, 2*time.Second, 10*time.Millisecond, "expected unreadable map to have one entry after first poll")

require.Eventually(t, func() bool {
countErrMsgs := 0
for _, e := range obs.All() {
if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file - unreadable" {
countErrMsgs++
}
}
return countErrMsgs == 1
}, 2*time.Second, 10*time.Millisecond, "expected exactly one 'Failed to open file - unreadable' error after first poll")

// Second poll should not add another error-level log for the same path
operator.poll(t.Context())

require.Eventually(t, func() bool {
countErrMsgs := 0
for _, e := range obs.All() {
if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file - unreadable" {
countErrMsgs++
}
}
return countErrMsgs == 1
}, 2*time.Second, 10*time.Millisecond, "expected still exactly one 'Failed to open file - unreadable' error after second poll")

// Verify the unreadable map still contains the entry (no reinitialization)
require.Len(t, operator.unreadable, 1, "expected unreadable map to still have one entry after second poll")

// Now make the file readable again and poll; should emit an info message
require.NoError(t, os.Chmod(f.Name(), 0o644))
operator.poll(t.Context())

require.Eventually(t, func() bool {
for _, e := range obs.All() {
if e.Level == zapcore.InfoLevel && e.Message == "Previously unreadable file is now readable" {
return true
}
}
return false
}, 2*time.Second, 10*time.Millisecond, "expected at least one info message when file becomes readable")
}

// TestNonPermissionErrorsAlwaysLogged verifies that errors other than permission
// errors (e.g., file not found) are always logged and not suppressed.
func TestNonPermissionErrorsAlwaysLogged(t *testing.T) {
t.Parallel()

tempDir := t.TempDir()
cfg := NewConfig().includeDir(tempDir)
operator, _ := testManager(t, cfg)

core, obs := observer.New(zapcore.DebugLevel)
logger := zap.New(core)
set := componenttest.NewNopTelemetrySettings()
set.Logger = logger

operator.set.Logger = set.Logger
require.NoError(t, operator.Start(testutil.NewUnscopedMockPersister()))
defer func() {
require.NoError(t, operator.Stop())
}()

// Directly call makeFingerprint with a non-existent file path
// This simulates a file being deleted after matching but before opening
nonExistentPath := tempDir + "/non_existent_file.log"

// First call should log an error
fp, file := operator.makeFingerprint(nonExistentPath)
require.Nil(t, fp)
require.Nil(t, file)

require.Eventually(t, func() bool {
for _, e := range obs.All() {
if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" {
return true
}
}
return false
}, 2*time.Second, 10*time.Millisecond, "expected an error log on first call for non-existent file")

countBeforeSecondCall := 0
for _, e := range obs.All() {
if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" {
countBeforeSecondCall++
}
}

// Second call should also log an error (not suppressed like permission errors)
fp, file = operator.makeFingerprint(nonExistentPath)
require.Nil(t, fp)
require.Nil(t, file)

require.Eventually(t, func() bool {
countAfterSecondCall := 0
for _, e := range obs.All() {
if e.Level == zapcore.ErrorLevel && e.Message == "Failed to open file" {
countAfterSecondCall++
}
}
// Should have at least one more error than before
return countAfterSecondCall > countBeforeSecondCall
}, 2*time.Second, 10*time.Millisecond, "expected another error log on second call for non-permission errors")

// Verify the unreadable map is empty (non-permission errors shouldn't be tracked)
require.Empty(t, operator.unreadable, "expected unreadable map to be empty for non-permission errors")
}