diff --git a/.chloggen/filestorage-panic.yaml b/.chloggen/filestorage-panic.yaml new file mode 100644 index 0000000000000..5575216bf5d32 --- /dev/null +++ b/.chloggen/filestorage-panic.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: filestorageextension + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add an option to recreate databse if the database file is corrupted. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35899] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/extension/storage/filestorage/README.md b/extension/storage/filestorage/README.md index cea3a8c97bbc8..eab8faa805d5c 100644 --- a/extension/storage/filestorage/README.md +++ b/extension/storage/filestorage/README.md @@ -31,6 +31,10 @@ The default timeout is `1s`. By default, the directories will be created with `0750 (rwxr-x---)` permissions, minus the process umask. Use `directory_permissions` to customize directory creation permissions, minus the process umask. +`recreate` when set, will rename the existing data storage to `{filename}.backup` and a new data file will be created from scratch. This option is useful if underlying database is corrupted and as a result, it can halt the entire collector process due to a panic. See (#36840)[https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/36840] for more details. + +> [!Note] +> Enabling `recreate` will regenerate the database files, which may lead to data duplication or data loss. ## Compaction `compaction` defines how and when files should be compacted. There are two modes of compaction available (both of which can be set concurrently): diff --git a/extension/storage/filestorage/config.go b/extension/storage/filestorage/config.go index ef842a93b6163..c988a0eb957ab 100644 --- a/extension/storage/filestorage/config.go +++ b/extension/storage/filestorage/config.go @@ -31,6 +31,8 @@ type Config struct { CreateDirectory bool `mapstructure:"create_directory,omitempty"` DirectoryPermissions string `mapstructure:"directory_permissions,omitempty"` directoryPermissionsParsed int64 `mapstructure:"-,omitempty"` + + Recreate bool `mapstructure:"recreate,omitempty"` } // CompactionConfig defines configuration for optional file storage compaction. diff --git a/extension/storage/filestorage/extension.go b/extension/storage/filestorage/extension.go index ff8ec568de83e..a2be454b49a06 100644 --- a/extension/storage/filestorage/extension.go +++ b/extension/storage/filestorage/extension.go @@ -71,6 +71,11 @@ func (lfs *localFileStorage) GetClient(_ context.Context, kind component.Kind, e rawName = sanitize(rawName) absoluteName := filepath.Join(lfs.cfg.Directory, rawName) + if lfs.cfg.Recreate { + if err := os.Rename(absoluteName, absoluteName+".backup"); err != nil { + return nil, fmt.Errorf("error renaming the database. Please remove %s manually: %w", absoluteName, err) + } + } client, err := newClient(lfs.logger, absoluteName, lfs.cfg.Timeout, lfs.cfg.Compaction, !lfs.cfg.FSync) if err != nil { return nil, err diff --git a/extension/storage/filestorage/extension_test.go b/extension/storage/filestorage/extension_test.go index cc936b6b1e247..fd7b840ff11dd 100644 --- a/extension/storage/filestorage/extension_test.go +++ b/extension/storage/filestorage/extension_test.go @@ -612,3 +612,81 @@ func TestDirectoryCreation(t *testing.T) { }) } } + +func TestRecreate(t *testing.T) { + ctx := context.Background() + temp := t.TempDir() + f := NewFactory() + + config := f.CreateDefaultConfig().(*Config) + config.Directory = temp + + // step 1: create an extension with default config and write some data + { + ext, err := f.Create(ctx, extensiontest.NewNopSettings(f.Type()), config) + require.NoError(t, err) + require.NotNil(t, ext) + + se, ok := ext.(storage.Extension) + require.True(t, ok) + + client, err := se.GetClient(ctx, component.KindReceiver, component.MustNewID("filelog"), "") + require.NoError(t, err) + require.NotNil(t, client) + + // write the data and make sure it is set in the subsequent get. + require.NoError(t, client.Set(ctx, "key", []byte("val"))) + val, err := client.Get(ctx, "key") + require.Equal(t, val, []byte("val")) + require.NoError(t, err) + + // close the extension + require.NoError(t, client.Close(ctx)) + require.NoError(t, ext.Shutdown(ctx)) + } + + // step 2: re-create the extension to make sure that the data is therw + { + ext, err := f.Create(ctx, extensiontest.NewNopSettings(f.Type()), config) + require.NoError(t, err) + require.NotNil(t, ext) + se, ok := ext.(storage.Extension) + require.True(t, ok) + + client, err := se.GetClient(ctx, component.KindReceiver, component.MustNewID("filelog"), "") + require.NoError(t, err) + require.NotNil(t, client) + + // make sure that the data exists from the previous pass. + val, err := client.Get(ctx, "key") + require.Equal(t, val, []byte("val")) + require.NoError(t, err) + + // close the extension + require.NoError(t, client.Close(ctx)) + require.NoError(t, ext.Shutdown(ctx)) + } + + // step 3: re-create the extension, but with Recreate=true and make sure that the data is not preset + { + config.Recreate = true + ext, err := f.Create(ctx, extensiontest.NewNopSettings(f.Type()), config) + require.NoError(t, err) + require.NotNil(t, ext) + se, ok := ext.(storage.Extension) + require.True(t, ok) + + client, err := se.GetClient(ctx, component.KindReceiver, component.MustNewID("filelog"), "") + require.NoError(t, err) + require.NotNil(t, client) + + // The data shouldn't exist. + val, err := client.Get(ctx, "key") + require.Nil(t, val) + require.NoError(t, err) + + // close the extension + require.NoError(t, client.Close(ctx)) + require.NoError(t, ext.Shutdown(ctx)) + } +}