From 3cfe2be5316c43e33375f7e4e5bebc6f880ed347 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 24 Jan 2023 11:42:31 -0500 Subject: [PATCH 01/35] add additional catalog indexes for performance Signed-off-by: Alex Goodman --- go.mod | 4 + go.sum | 2 + pkg/file/tarutil_test.go | 2 +- pkg/image/content_helpers.go | 54 +- pkg/image/file_catalog.go | 142 +++- pkg/image/file_catalog_test.go | 792 ++++++++++++++++-- pkg/image/image.go | 17 +- pkg/image/layer.go | 57 +- .../test-fixtures/generators/fixture-2.sh | 52 ++ 9 files changed, 1007 insertions(+), 115 deletions(-) create mode 100755 pkg/image/test-fixtures/generators/fixture-2.sh diff --git a/go.mod b/go.mod index eeae65c5..1aafe9ab 100644 --- a/go.mod +++ b/go.mod @@ -7,12 +7,14 @@ require ( github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8 github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 + github.com/becheran/wildmatch-go v1.0.0 github.com/bmatcuk/doublestar/v4 v4.0.2 github.com/containerd/containerd v1.6.12 github.com/docker/cli v20.10.12+incompatible github.com/docker/docker v20.10.12+incompatible github.com/gabriel-vasile/mimetype v1.4.0 github.com/go-test/deep v1.0.8 + github.com/google/go-cmp v0.5.6 github.com/google/go-containerregistry v0.7.0 github.com/hashicorp/go-multierror v1.1.1 github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 @@ -82,3 +84,5 @@ require ( google.golang.org/protobuf v1.28.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) + +require golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect diff --git a/go.sum b/go.sum index 18aa55bd..365ddab7 100644 --- a/go.sum +++ b/go.sum @@ -130,6 +130,8 @@ github.com/aws/smithy-go v1.6.0 h1:T6puApfBcYiTIsaI+SYWqanjMt5pc3aoyyDrI+0YH54= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 h1:p2I85zYI9z5/c/3Q0LiO3RtNXcmXHTtJfml/hV16zNg= github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04/go.mod h1:Z+bXnIbhKJYSvxNwsNnwde7pDKxuqlEZCbUBoTwAqf0= +github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA= +github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4= github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go index cd67f23e..4c93fa58 100644 --- a/pkg/file/tarutil_test.go +++ b/pkg/file/tarutil_test.go @@ -95,7 +95,7 @@ func TestMetadataFromTar(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - f := getTarFixture(t, "fixture-1") + f := getTarFixture(t, test.fixture) metadata, err := MetadataFromTar(f, test.name) assert.NoError(t, err) assert.Equal(t, test.expected, metadata) diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index 50709085..01ac77ad 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -31,20 +31,64 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.Reference, error) { fileEntries, err := fileCatalog.GetByMIMEType(mType) if err != nil { - return nil, fmt.Errorf("unable to fetch file references by MIME type: %w", err) + return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) } + // since this query is related to the contents of the path, this should be a strict file ID match + return filterCatalogFilesRelativesToTree(ft, fileEntries, true, filetree.FollowBasenameLinks) +} + +// fetchFilesByExtension is a common helper function for resolving file references for a file extension from the file +// catalog relative to the given tree. +func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.Reference, error) { + fileEntries, err := fileCatalog.GetByExtension(extension) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by extension (%q): %w", extension, err) + } + + return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) +} + +// fetchFilesByBasename is a common helper function for resolving file references for a file basename +// catalog relative to the given tree. +func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.Reference, error) { + fileEntries, err := fileCatalog.GetByBasename(basename) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", basename, err) + } + + return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) +} + +// fetchFilesByBasenameGlob is a common helper function for resolving file references for a file basename glob pattern +// catalog relative to the given tree. +func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlob string) ([]file.Reference, error) { + fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlob) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlob, err) + } + + return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) +} + +func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, strictFileID bool, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.Reference, error) { var refs []file.Reference for _, entry := range fileEntries { - _, ref, err := ft.File(entry.File.RealPath, filetree.FollowBasenameLinks) + _, ref, err := ft.File(entry.File.RealPath, linkResolutionOpts...) if err != nil { return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.File.RealPath, err) } - // we know this entry exists in the tree, keep track of the reference for this file - if ref != nil && ref.ID() == entry.File.ID() { - refs = append(refs, *ref) + if ref == nil { + continue + } + + if strictFileID && ref.ID() != entry.File.ID() { + continue } + + // we know this entry exists in the tree, keep track of the reference for this file + refs = append(refs, *ref) } return refs, nil } diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index 3f62ed97..4ef8d61b 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -3,8 +3,12 @@ package image import ( "fmt" "io" + "path" + "strings" "sync" + "github.com/becheran/wildmatch-go" + "github.com/anchore/stereoscope/pkg/file" ) @@ -14,8 +18,11 @@ var ErrFileNotFound = fmt.Errorf("could not find file") // blobs (i.e. everything except for the image index/manifest/metadata files). type FileCatalog struct { sync.RWMutex - catalog map[file.ID]FileCatalogEntry - byMIMEType map[string][]file.ID + catalog map[file.ID]FileCatalogEntry + byMIMEType map[string][]file.ID + byExtension map[string][]file.ID + byBasename map[string][]file.ID + basenames []string } // FileCatalogEntry represents all stored metadata for a single file reference. @@ -29,8 +36,10 @@ type FileCatalogEntry struct { // NewFileCatalog returns an empty FileCatalog. func NewFileCatalog() FileCatalog { return FileCatalog{ - catalog: make(map[file.ID]FileCatalogEntry), - byMIMEType: make(map[string][]file.ID), + catalog: make(map[file.ID]FileCatalogEntry), + byMIMEType: make(map[string][]file.ID), + byExtension: make(map[string][]file.ID), + byBasename: make(map[string][]file.ID), } } @@ -39,12 +48,23 @@ func NewFileCatalog() FileCatalog { func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener file.Opener) { c.Lock() defer c.Unlock() + id := f.ID() + if m.MIMEType != "" { // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have // the contents and the MIME type could not be determined then the default value is application/octet-stream. - c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], f.ID()) + c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], id) + } + + basename := path.Base(string(f.RealPath)) + c.byBasename[basename] = append(c.byBasename[basename], id) + c.basenames = append(c.basenames, basename) + + for _, ext := range fileExtensions(string(f.RealPath)) { + c.byExtension[ext] = append(c.byExtension[ext], id) } - c.catalog[f.ID()] = FileCatalogEntry{ + + c.catalog[id] = FileCatalogEntry{ File: f, Metadata: m, Layer: l, @@ -72,18 +92,73 @@ func (c *FileCatalog) Get(f file.Reference) (FileCatalogEntry, error) { return value, nil } +func (c *FileCatalog) Basenames() []string { + c.RLock() + defer c.RUnlock() + + return c.basenames +} + func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) { c.RLock() defer c.RUnlock() + fileIDs, ok := c.byMIMEType[mType] if !ok { return nil, nil } + + var entries []FileCatalogEntry + for _, id := range fileIDs { + entry, ok := c.catalog[id] + if !ok { + return nil, ErrFileNotFound + } + entries = append(entries, entry) + } + + return entries, nil +} + +func (c *FileCatalog) GetByExtension(extension string) ([]FileCatalogEntry, error) { + c.RLock() + defer c.RUnlock() + + fileIDs, ok := c.byExtension[extension] + if !ok { + return nil, nil + } + + var entries []FileCatalogEntry + for _, id := range fileIDs { + entry, ok := c.catalog[id] + if !ok { + return nil, ErrFileNotFound + } + entries = append(entries, entry) + } + + return entries, nil +} + +func (c *FileCatalog) GetByBasename(basename string) ([]FileCatalogEntry, error) { + c.RLock() + defer c.RUnlock() + + if strings.Contains(basename, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + fileIDs, ok := c.byBasename[basename] + if !ok { + return nil, nil + } + var entries []FileCatalogEntry for _, id := range fileIDs { entry, ok := c.catalog[id] if !ok { - return nil, fmt.Errorf("could not find file: %+v", id) + return nil, ErrFileNotFound } entries = append(entries, entry) } @@ -91,7 +166,34 @@ func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) { return entries, nil } -// FetchContents reads the file contents for the given file reference from the underlying image/layer blob. An error +func (c *FileCatalog) GetByBasenameGlob(glob string) ([]FileCatalogEntry, error) { + c.RLock() + defer c.RUnlock() + + if strings.Contains(glob, "**") { + return nil, fmt.Errorf("basename glob patterns with '**' are not supported") + } + if strings.Contains(glob, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + patternObj := wildmatch.NewWildMatch(glob) + + var fileEntries []FileCatalogEntry + for _, b := range c.Basenames() { + if patternObj.IsMatch(b) { + bns, err := c.GetByBasename(b) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) + } + fileEntries = append(fileEntries, bns...) + } + } + + return fileEntries, nil +} + +// FileContents reads the file contents for the given file reference from the underlying image/layer blob. An error // is returned if there is no file at the given path and layer or the read operation cannot continue. func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) { c.RLock() @@ -107,3 +209,27 @@ func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) { return catalogEntry.Contents(), nil } + +func fileExtensions(p string) []string { + var exts []string + p = strings.TrimSpace(p) + + // ignore oddities + if strings.HasSuffix(p, ".") { + return exts + } + + // ignore directories + if strings.HasSuffix(p, "/") { + return exts + } + + // ignore . which indicate a hidden file + p = strings.TrimLeft(path.Base(p), ".") + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '.' { + exts = append(exts, p[i:]) + } + } + return exts +} diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index b7211a7b..a22614d1 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -6,6 +6,11 @@ package image import ( "crypto/sha256" "fmt" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "io" "os" "os/exec" @@ -29,80 +34,6 @@ var ( tarCachePath = path.Join(fixturesPath, "tar-cache") ) -func getTarFixture(t *testing.T, name string) (*os.File, func()) { - generatorScriptName := name + ".sh" - generatorScriptPath := path.Join(fixturesGeneratorsPath, generatorScriptName) - if !fileExists(t, generatorScriptPath) { - t.Fatalf("no tar generator script for fixture '%s'", generatorScriptPath) - } - - version := fixtureVersion(t, generatorScriptPath) - tarName := name + ":" + version + ".tar" - tarFixturePath := path.Join(tarCachePath, tarName) - - if !fileExists(t, tarFixturePath) { - t.Logf("Creating tar fixture: %s", tarFixturePath) - - fullPath, err := filepath.Abs(tarFixturePath) - if err != nil { - t.Fatal(err) - } - - cmd := exec.Command("./"+generatorScriptName, fullPath) - cmd.Env = os.Environ() - cmd.Dir = fixturesGeneratorsPath - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Stdin = os.Stdin - - err = cmd.Run() - if err != nil { - panic(err) - } - } - - fh, err := os.Open(tarFixturePath) - if err != nil { - t.Fatalf("could not open tar fixture '%s'", tarFixturePath) - } - - return fh, func() { - fh.Close() - } -} - -func fixtureVersion(t *testing.T, path string) string { - t.Helper() - f, err := os.Open(path) - if err != nil { - t.Fatal(err) - } - defer func() { - err := f.Close() - if err != nil { - t.Fatal(err) - } - }() - - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - t.Fatal(err) - } - - return fmt.Sprintf("%x", h.Sum(nil)) -} - -func fileExists(t *testing.T, filename string) bool { - t.Helper() - info, err := os.Stat(filename) - if os.IsNotExist(err) { - return false - } else if err != nil { - t.Fatal(err) - } - return !info.IsDir() -} - func TestFileCatalog_Add(t *testing.T) { ref := file.NewFileReference("/somepath") @@ -178,8 +109,7 @@ func (t *testLayerContent) MediaType() (types.MediaType, error) { } func TestFileCatalog_FileContents(t *testing.T) { - fixtureFile, cleanup := getTarFixture(t, "fixture-1") - defer cleanup() + fixtureFile := getTarFixture(t, "fixture-1") // a real path & contents from the fixture p := "path/branch/one/file-1.txt" @@ -192,22 +122,17 @@ func TestFileCatalog_FileContents(t *testing.T) { } tr, err := file.NewTarIndex(fixtureFile.Name(), nil) - if err != nil { - t.Fatalf("unable to get indexed reader") - } + require.NoError(t, err) + layer := &Layer{ layer: &testLayerContent{}, indexedContent: tr, } entries, err := tr.EntriesByName(p) - if err != nil { - t.Fatalf("unable to get entryies: %+v", err) - } + require.NoError(t, err) - if len(entries) != 1 { - t.Fatalf("bad entries len: %d", len(entries)) - } + require.Len(t, entries, 1) opener := func() io.ReadCloser { return io.NopCloser(entries[0].Reader) @@ -217,16 +142,703 @@ func TestFileCatalog_FileContents(t *testing.T) { catalog.Add(*ref, metadata, layer, opener) reader, err := catalog.FileContents(*ref) + require.NoError(t, err) + + actual, err := io.ReadAll(reader) + require.NoError(t, err) + + for _, d := range deep.Equal([]byte(expected), actual) { + t.Errorf("diff: %+v", d) + } +} + +func Test_fileExtensions(t *testing.T) { + tests := []struct { + name string + path string + want []string + }{ + { + name: "empty", + path: "", + }, + { + name: "directory", + path: "/somewhere/to/nowhere/", + }, + { + name: "directory with ext", + path: "/somewhere/to/nowhere.d/", + }, + { + name: "single extension", + path: "/somewhere/to/my.tar", + want: []string{".tar"}, + }, + { + name: "multiple extensions", + path: "/somewhere/to/my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . prefix", + path: "/somewhere/to/.my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore more . prefixes", + path: "/somewhere/to/...my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . suffixes", + path: "/somewhere/to/my.tar.gz...", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, fileExtensions(tt.path)) + }) + } +} + +func TestFileCatalog_GetByExtension(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.NewFileTree() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []FileCatalogEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get simple extension", + input: ".txt", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + TarHeaderName: "path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + TarHeaderName: "path/branch.d/two/file-2.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + TarHeaderName: "path/file-3.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get mixed type extension", + input: ".d", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + TarHeaderName: "path/branch.d/", + TypeFlag: 53, + IsDir: true, + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + TarHeaderName: "path/branch.d/one/file-4.d", + TypeFlag: 48, // regular file + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + TarHeaderName: "path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink + }, + }, + { + File: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + TarHeaderName: "path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink + }, + }, + }, + }, + { + name: "get long extension", + input: ".tar.gz", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + TarHeaderName: "path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + TarHeaderName: "path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get short extension", + input: ".gz", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + TarHeaderName: "path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + TarHeaderName: "path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existent extension", + input: ".blerg-123", + want: []FileCatalogEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByExtension(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasename(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.NewFileTree() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []FileCatalogEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.txt", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + TarHeaderName: "path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "file-11.txt", + want: []FileCatalogEntry{}, + }, + { + name: "get directory name", + input: "branch.d", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + TarHeaderName: "path/branch.d/", + TypeFlag: 53, + IsDir: true, + }, + }, + { + File: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + TarHeaderName: "path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink + }, + }, + }, + }, + { + name: "get symlink name", + input: "file-1.d", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + TarHeaderName: "path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file-1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByBasename(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasenameGlob(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.NewFileTree() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []FileCatalogEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.*", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + TarHeaderName: "path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + TarHeaderName: "path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, + }, + }, + }, + }, + { + name: "get non-existing name", + input: "blerg-*.txt", + want: []FileCatalogEntry{}, + }, + { + name: "get directory name", + input: "bran*.d", + want: []FileCatalogEntry{ + // below is the unique behavior to this function... + { + File: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + TarHeaderName: "path/branch.d/", + TypeFlag: 53, + IsDir: true, + }, + }, + { + File: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + TarHeaderName: "path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, + }, + }, + // below is the same as ByBasename() + { + File: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + TarHeaderName: "path/branch.d/", + TypeFlag: 53, + IsDir: true, + }, + }, + { + File: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + TarHeaderName: "path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink + }, + }, + }, + }, + { + name: "get symlink name", + input: "file?1.d", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + TarHeaderName: "path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file?1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByBasenameGlob(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByMimeType(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.NewFileTree() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + input string + want []FileCatalogEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file mimetype", + input: "text/plain", + want: []FileCatalogEntry{ + { + File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + TarHeaderName: "path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + TarHeaderName: "path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + TarHeaderName: "path/branch.d/one/file-4.d", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + TarHeaderName: "path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + TarHeaderName: "path/branch.d/two/file-2.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + { + File: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + TarHeaderName: "path/file-3.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing mimetype", + input: "text/bogus", + want: []FileCatalogEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileCatalog.GetByMIMEType(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetBasenames(t *testing.T) { + fixtureTarFile := getTarFixture(t, "fixture-2") + + ft := filetree.NewFileTree() + fileCatalog := NewFileCatalog() + var size int64 + + // we don't need the index itself, just the side effect on the file catalog after indexing + _, err := file.NewTarIndex( + fixtureTarFile.Name(), + layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + ) + require.NoError(t, err) + + tests := []struct { + name string + want []string + }{ + { + name: "go case", + want: []string{ + ".file-4.tar.gz", + "branch", + "branch.d", + "branch.d", + "common", + "file-1.d", + "file-1.txt", + "file-2.txt", + "file-3.txt", + "file-4", + "file-4.d", + "file-4.tar.gz", + "one", + "path", + "two", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := fileCatalog.Basenames() + assert.ElementsMatchf(t, tt.want, actual, "diff: %s", cmp.Diff(tt.want, actual)) + }) + } +} + +func getTarFixture(t *testing.T, name string) *os.File { + generatorScriptName := name + ".sh" + generatorScriptPath := path.Join(fixturesGeneratorsPath, generatorScriptName) + if !fileExists(t, generatorScriptPath) { + t.Fatalf("no tar generator script for fixture '%s'", generatorScriptPath) + } + + version := fixtureVersion(t, generatorScriptPath) + tarName := name + ":" + version + ".tar" + tarFixturePath := path.Join(tarCachePath, tarName) + + if !fileExists(t, tarFixturePath) { + t.Logf("Creating tar fixture: %s", tarFixturePath) + + fullPath, err := filepath.Abs(tarFixturePath) + if err != nil { + t.Fatal(err) + } + + cmd := exec.Command("./"+generatorScriptName, fullPath) + cmd.Env = os.Environ() + cmd.Dir = fixturesGeneratorsPath + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + err = cmd.Run() + if err != nil { + panic(err) + } + } + + fh, err := os.Open(tarFixturePath) if err != nil { - t.Fatalf("could not get contents by ref: %+v", err) + t.Fatalf("could not open tar fixture '%s'", tarFixturePath) } - actual, err := io.ReadAll(reader) + t.Cleanup(func() { + require.NoError(t, fh.Close()) + }) + + return fh +} + +func fixtureVersion(t *testing.T, path string) string { + t.Helper() + f, err := os.Open(path) if err != nil { - t.Fatalf("could not read content reader: %+v", err) + t.Fatal(err) } + defer func() { + err := f.Close() + if err != nil { + t.Fatal(err) + } + }() - for _, d := range deep.Equal([]byte(expected), actual) { - t.Errorf("diff: %+v", d) + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + t.Fatal(err) } + + return fmt.Sprintf("%x", h.Sum(nil)) +} + +func fileExists(t *testing.T, filename string) bool { + t.Helper() + info, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } else if err != nil { + t.Fatal(err) + } + return !info.IsDir() } diff --git a/pkg/image/image.go b/pkg/image/image.go index 0748c2c4..8d7400c5 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -280,7 +280,22 @@ func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer. +// FilesByExtensionFromSquash returns file references for files that have the given extension relative to the squash tree. +func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.Reference, error) { + return fetchFilesByExtension(i.SquashedTree(), &i.FileCatalog, extension) +} + +// FilesByBasenameFromSquash returns file references for files with the given basename relative to the squash tree. +func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.Reference, error) { + return fetchFilesByBasename(i.SquashedTree(), &i.FileCatalog, basename) +} + +// FilesByBasenameGlobFromSquash returns file references for files with the given basename glob pattern relative to the squash tree. +func (i *Image) FilesByBasenameGlobFromSquash(glob string) ([]file.Reference, error) { + return fetchFilesByBasenameGlob(i.SquashedTree(), &i.FileCatalog, glob) +} + +// FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { return i.FileCatalog.FileContents(ref) diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 96fe14c3..99ebba45 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -108,7 +108,10 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return err } - l.indexedContent, err = file.NewTarIndex(tarFilePath, l.indexer(monitor)) + l.indexedContent, err = file.NewTarIndex( + tarFilePath, + layerTarIndexer(l.Tree, l.fileCatalog, &l.Metadata.Size, l, monitor), + ) if err != nil { return fmt.Errorf("failed to read layer=%q tar : %w", l.Metadata.Digest, err) } @@ -139,7 +142,7 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return nil } -// FetchContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". +// FileContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". // An error is returned if there is no file at the given path and layer or the read operation cannot continue. func (l *Layer) FileContents(path file.Path) (io.ReadCloser, error) { return fetchFileContentsByPath(l.Tree, l.fileCatalog, path) @@ -177,7 +180,37 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { +// FilesByExtension returns file references for files that have the given extension. +func (l *Layer) FilesByExtension(extension string) ([]file.Reference, error) { + return fetchFilesByExtension(l.Tree, l.fileCatalog, extension) +} + +// FilesByExtensionFromSquash returns file references for files have the given extension relative to the squash tree. +func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.Reference, error) { + return fetchFilesByExtension(l.SquashedTree, l.fileCatalog, extension) +} + +// FilesByBasename returns file references for files that have the following basename. +func (l *Layer) FilesByBasename(basename string) ([]file.Reference, error) { + return fetchFilesByBasename(l.Tree, l.fileCatalog, basename) +} + +// FilesByBasenameFromSquash returns file references for files by name relative to the squash tree. +func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.Reference, error) { + return fetchFilesByBasename(l.SquashedTree, l.fileCatalog, extension) +} + +// FilesByBasenameGlob returns file references for files that have the following basename glob. +func (l *Layer) FilesByBasenameGlob(glob string) ([]file.Reference, error) { + return fetchFilesByBasenameGlob(l.Tree, l.fileCatalog, glob) +} + +// FilesByBasenameGlobFromSquash returns file references for files by basename glob pattern relative to the squash tree. +func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.Reference, error) { + return fetchFilesByBasenameGlob(l.SquashedTree, l.fileCatalog, glob) +} + +func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { return func(index file.TarIndexEntry) error { var err error var entry = index.ToTarFileEntry() @@ -203,22 +236,22 @@ func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { var fileReference *file.Reference switch metadata.TypeFlag { case tar.TypeSymlink: - fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) + fileReference, err = ft.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) if err != nil { return err } case tar.TypeLink: - fileReference, err = l.Tree.AddHardLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) + fileReference, err = ft.AddHardLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) if err != nil { return err } case tar.TypeDir: - fileReference, err = l.Tree.AddDir(file.Path(metadata.Path)) + fileReference, err = ft.AddDir(file.Path(metadata.Path)) if err != nil { return err } default: - fileReference, err = l.Tree.AddFile(file.Path(metadata.Path)) + fileReference, err = ft.AddFile(file.Path(metadata.Path)) if err != nil { return err } @@ -227,10 +260,14 @@ func (l *Layer) indexer(monitor *progress.Manual) file.TarIndexVisitor { return fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.Linkname) } - l.Metadata.Size += metadata.Size - l.fileCatalog.Add(*fileReference, metadata, l, index.Open) + if size != nil { + *(size) += metadata.Size + } + fileCatalog.Add(*fileReference, metadata, layerRef, index.Open) - monitor.N++ + if monitor != nil { + monitor.N++ + } return nil } } diff --git a/pkg/image/test-fixtures/generators/fixture-2.sh b/pkg/image/test-fixtures/generators/fixture-2.sh new file mode 100755 index 00000000..0c7b2f19 --- /dev/null +++ b/pkg/image/test-fixtures/generators/fixture-2.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -ue + +realpath() { + [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" +} + +FIXTURE_TAR_PATH=$1 +FIXTURE_NAME=$(basename $FIXTURE_TAR_PATH) +FIXTURE_DIR=$(realpath $(dirname $FIXTURE_TAR_PATH)) + +# note: since tar --sort is not an option on mac, and we want these generation scripts to be generally portable, we've +# elected to use docker to generate the tar +docker run --rm -i \ + -u $(id -u):$(id -g) \ + -v ${FIXTURE_DIR}:/scratch \ + -w /scratch \ + ubuntu:latest \ + /bin/bash -xs < path/branch.d/one/file-1.txt + echo "forth file" > path/branch.d/one/file-4.d + echo "multi ext file" > path/branch.d/one/file-4.tar.gz + echo "hidden file" > path/branch.d/one/.file-4.tar.gz + + ln -s path/branch.d path/common/branch.d + ln -s path/branch.d path/common/branch + ln -s path/branch.d/one/file-4.d path/common/file-4 + ln -s path/branch.d/one/file-1.txt path/common/file-1.d + + echo "second file" > path/branch.d/two/file-2.txt + + echo "third file" > path/file-3.txt + + # permissions + chmod -R 755 path + chmod -R 700 path/branch/one/ + chmod 664 path/file-3.txt + + # tar + owner + # note: sort by name is important for test file header entry ordering + tar --sort=name --owner=1337 --group=5432 -cvf "/scratch/${FIXTURE_NAME}" path/ + +popd +EOF From 2f5d2d06c95cefbd3541b9c5166625bd94c8e692 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 27 Jan 2023 13:28:25 -0500 Subject: [PATCH 02/35] [wip] link resolution Signed-off-by: Alex Goodman --- pkg/file/reference.go | 65 +++ pkg/filetree/depth_first_path_walker.go | 17 +- pkg/filetree/filetree.go | 242 +++++++---- pkg/filetree/filetree_test.go | 527 +++++++++++++++--------- pkg/filetree/glob.go | 14 +- pkg/filetree/union_filetree_test.go | 8 +- pkg/image/content_helpers.go | 42 +- pkg/image/file_catalog.go | 47 ++- pkg/image/file_catalog_test.go | 39 +- pkg/image/image.go | 16 +- pkg/image/layer.go | 20 +- 11 files changed, 644 insertions(+), 393 deletions(-) diff --git a/pkg/file/reference.go b/pkg/file/reference.go index 47ed8ae9..d200dd77 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -9,12 +9,77 @@ var nextID = 0 // ID is used for file tree manipulation to uniquely identify tree nodes. type ID uint64 +type LinkResolution struct { + AncestorResolution []ReferenceAccess + LeafResolution []ReferenceAccess +} + +// ReferenceAccess represents the fetching of a file reference via a (possibly different) path. +type ReferenceAccess struct { + RequestPath Path + *Reference +} + +// ReferenceVia represents a unique file, and how it was accessed, showing full symlink resolution. +type ReferenceVia struct { + ReferenceAccess + LinkResolution +} + +// RequestPaths represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceVia) RequestPaths() []Path { + //paths := []Path{f.RequestPath} + var paths []Path + for _, p := range f.LeafResolution { + paths = append(paths, p.RequestPath) + } + return paths +} + +// AccessReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceVia) AccessReferences() []*Reference { + var refs []*Reference + for _, p := range f.LeafResolution { + refs = append(refs, p.Reference) + } + //refs = append(refs, f.Reference) + return refs +} + +// RealPaths represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceVia) RealPaths() []Path { + var refs []Path + for _, p := range f.LeafResolution { + if p.Reference != nil { + refs = append(refs, p.Reference.RealPath) + } + } + //if f.Reference != nil { + // refs = append(refs, f.Reference.RealPath) + //} + return refs +} + // Reference represents a unique file. This is useful when path is not good enough (i.e. you have the same file path for two files in two different container image layers, and you need to be able to distinguish them apart) type Reference struct { id ID RealPath Path // file path with NO symlinks or hardlinks in constituent paths } +// NewFileReferenceVia shows how a reference was accessed. +func NewFileReferenceVia(path Path, ref *Reference, ancestors []ReferenceAccess, leafs []ReferenceAccess) *ReferenceVia { + return &ReferenceVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: path, + Reference: ref, + }, + LinkResolution: LinkResolution{ + AncestorResolution: ancestors, + LeafResolution: leafs, + }, + } +} + // NewFileReference creates a new unique file reference for the given path. func NewFileReference(path Path) *Reference { nextID++ diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index f246d8a9..54c6e48a 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -54,16 +54,17 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions return w } -//nolint:gocognit -func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) { +// nolint:gocognit +func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *nodeAccess, error) { w.pathStack.Push(from) var currentPath file.Path - var currentNode *filenode.FileNode + var currentNode *nodeAccess var err error for w.pathStack.Size() > 0 { currentPath = w.pathStack.Pop() + // TODO: should we make these link resolutions configurable so you can observe the links on walk as well? currentNode, err = w.tree.node(currentPath, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, @@ -72,7 +73,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo if err != nil { return "", nil, err } - if currentNode == nil { + if currentNode == nil || currentNode.FileNode == nil { return "", nil, fmt.Errorf("nil Node at path=%q", currentPath) } @@ -81,15 +82,15 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo return currentPath, currentNode, ErrMaxTraversalDepth } - if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode) { + if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode.FileNode) { return currentPath, currentNode, nil } currentPath = currentPath.Normalize() // visit if w.visitor != nil && !w.visitedPaths.Contains(currentPath) { - if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode) { - err := w.visitor(currentPath, *currentNode) + if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode.FileNode) { + err := w.visitor(currentPath, *currentNode.FileNode) if err != nil { return currentPath, currentNode, err } @@ -97,7 +98,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo } } - if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode) { + if w.conditions.ShouldContinueBranch != nil && !w.conditions.ShouldContinueBranch(currentPath, *currentNode.FileNode) { continue } diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 5ca0f413..93351680 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -18,6 +18,21 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") +// nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. +type nodeAccess struct { + RequestPath file.Path + FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) + AncestorLinkResolution []nodeAccess + LeafLinkResolution []nodeAccess +} + +func (na *nodeAccess) HasFileNode() bool { + if na == nil { + return false + } + return na.FileNode != nil +} + // FileTree represents a file/directory Tree type FileTree struct { tree *tree.Tree @@ -87,12 +102,12 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, nil } - if n.FileType != file.TypeDir { + if n.FileNode.FileType != file.TypeDir { return nil, nil } var listing []file.Path - children := t.tree.Children(n) + children := t.tree.Children(n.FileNode) for _, child := range children { if child == nil { continue @@ -106,13 +121,13 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, err } - listing = append(listing, file.Path(path.Join(string(dir), fn.RealPath.Basename()))) + listing = append(listing, file.Path(path.Join(string(dir), fn.FileNode.RealPath.Basename()))) } return listing, nil } // File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. -func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Reference, error) { +func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceVia, error) { userStrategy := newLinkResolutionStrategy(options...) // For: /some/path/here // Where: /some/path -> /other/place @@ -131,30 +146,53 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, // // Therefore we can safely lookup the path first without worrying about symlink resolution yet... if there is a // hit, return it! If not, fallback to symlink resolution. - - currentNode, err := t.node(path, linkResolutionStrategy{}) - if err != nil { - return false, nil, err - } - if currentNode != nil && (!currentNode.IsLink() || currentNode.IsLink() && !userStrategy.FollowBasenameLinks) { - return true, currentNode.Reference, nil - } + //currentNode, err := t.node(path, linkResolutionStrategy{}) + //if err != nil { + // return false, nil, err + //} + //if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) { + // return true, file.NewFileReferenceVia( + // path, + // currentNode.FileNode.Reference, + // newReferenceAccessPath(currentNode.AncestorLinkResolution), + // newReferenceAccessPath(currentNode.LeafLinkResolution), + // ), nil + //} // symlink resolution!... within the context of container images (which is outside of the responsibility of this object) // the only really valid resolution of symlinks is in squash trees (both for an image and a layer --NOT for trees // that represent a single union FS layer. - currentNode, err = t.node(path, linkResolutionStrategy{ + + currentNode, err := t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: userStrategy.FollowBasenameLinks, DoNotFollowDeadBasenameLinks: userStrategy.DoNotFollowDeadBasenameLinks, }) - if currentNode != nil { - return true, currentNode.Reference, err + if currentNode.HasFileNode() { + return true, file.NewFileReferenceVia( + path, + currentNode.FileNode.Reference, + newReferenceAccessPath(currentNode.AncestorLinkResolution), + newReferenceAccessPath(currentNode.LeafLinkResolution), + ), err } return false, nil, err } -func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode.FileNode, error) { +func newReferenceAccessPath(nodePath []nodeAccess) []file.ReferenceAccess { + var refPath []file.ReferenceAccess + for _, n := range nodePath { + refPath = append(refPath, + file.ReferenceAccess{ + Reference: n.FileNode.Reference, + RequestPath: n.RequestPath, + }, + ) + } + return refPath +} + +func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAccess, error) { normalizedPath := p.Normalize() nodeID := filenode.IDByPath(normalizedPath) if !strategy.FollowLinks() { @@ -162,10 +200,13 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode if n == nil { return nil, nil } - return n.(*filenode.FileNode), nil + return &nodeAccess{ + RequestPath: normalizedPath, + FileNode: n.(*filenode.FileNode), + }, nil } - var currentNode *filenode.FileNode + var currentNode *nodeAccess var err error if strategy.FollowAncestorLinks { currentNode, err = t.resolveAncestorLinks(normalizedPath, nil) @@ -175,12 +216,15 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode } else { n := t.tree.Node(nodeID) if n != nil { - currentNode = n.(*filenode.FileNode) + currentNode = &nodeAccess{ + RequestPath: normalizedPath, + FileNode: n.(*filenode.FileNode), + } } } // link resolution has come up with nothing, return what we have so far - if currentNode == nil { + if !currentNode.HasFileNode() { return currentNode, nil } @@ -192,20 +236,21 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*filenode // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.Set) (*filenode.FileNode, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.Set) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution - currentNode, err := t.node(path, linkResolutionStrategy{}) + currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) if err != nil { return nil, err } - if currentNode != nil { - return currentNode, nil + if currentNodeAccess.HasFileNode() { + return currentNodeAccess, nil } var pathParts = strings.Split(string(path), file.DirSeparator) var currentPathStr string var currentPath file.Path + var nodePath []nodeAccess // iterate through all parts of the path, replacing path elements with link resolutions where possible. for idx, part := range pathParts { @@ -219,13 +264,13 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. currentPathStr = string(currentPath) // fetch the Node with NO link resolution strategy - currentNode, err = t.node(currentPath, linkResolutionStrategy{}) + currentNodeAccess, err = t.node(currentPath, linkResolutionStrategy{}) if err != nil { // should never occur return nil, err } - if currentNode == nil { + if !currentNodeAccess.HasFileNode() { // we've reached a point where the given path that has never been observed. This can happen for one reason: // 1. the current path is really invalid and we should return NIL indicating that it cannot be resolved. // 2. the current path is a link? no, this isn't possible since we are iterating through constituent paths @@ -234,39 +279,43 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. } // keep track of what we've resolved to so far... - currentPath = currentNode.RealPath + currentPath = currentNodeAccess.FileNode.RealPath // this is positively a path, however, there is no information about this Node. This may be OK since we // allow for adding children before parents (and even don't require the parent to ever be added --which is // potentially valid given the underlying messy data [tar headers]). In this case we keep building the path // (which we've already done at this point) and continue. - if currentNode.Reference == nil { + if currentNodeAccess.FileNode.Reference == nil { continue } // by this point we definitely have a file reference, if this is a link (and not the basename) resolve any // links until the next Node is resolved (or not). isLastPart := idx == len(pathParts)-1 - if !isLastPart && currentNode.IsLink() { - currentNode, err = t.resolveNodeLinks(currentNode, true, attemptedPaths) + if !isLastPart && currentNodeAccess.FileNode.IsLink() { + currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptedPaths) if err != nil { // only expected to happen on cycles - return currentNode, err + currentNodeAccess.AncestorLinkResolution = append(currentNodeAccess.AncestorLinkResolution, nodePath...) + return currentNodeAccess, err } - if currentNode != nil { - currentPath = currentNode.RealPath + if currentNodeAccess.HasFileNode() { + currentPath = currentNodeAccess.FileNode.RealPath } currentPathStr = string(currentPath) } + + nodePath = append(nodePath, *currentNodeAccess) } // by this point we have processed all constituent paths; there were no un-added paths and the path is guaranteed // to have followed link resolution. - return currentNode, nil + currentNodeAccess.AncestorLinkResolution = append(currentNodeAccess.AncestorLinkResolution, nodePath...) + return currentNodeAccess, nil } // followNode takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). -func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*filenode.FileNode, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } @@ -277,42 +326,45 @@ func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLink } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks - var lastNode *filenode.FileNode + var lastNode *nodeAccess + var nodePath []nodeAccess - currentNode := n + currentNodeAccess := n // keep resolving links until a regular file or directory is found alreadySeen := internal.NewStringSet() var err error for { + nodePath = append(nodePath, *currentNodeAccess) + // if there is no next path, return this reference (dead link) - if currentNode == nil { + if !currentNodeAccess.HasFileNode() { break } - if alreadySeen.Contains(string(currentNode.RealPath)) { + if alreadySeen.Contains(string(currentNodeAccess.FileNode.RealPath)) { return nil, ErrLinkCycleDetected } - if !currentNode.IsLink() { + if !currentNodeAccess.FileNode.IsLink() { // no resolution and there is no next link (pseudo dead link)... return what you found // any content fetches will fail, but that's ok break } // prepare for the next iteration - alreadySeen.Add(string(currentNode.RealPath)) + alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) var nextPath file.Path - if currentNode.LinkPath.IsAbsolutePath() { + if currentNodeAccess.FileNode.LinkPath.IsAbsolutePath() { // use links with absolute paths blindly - nextPath = currentNode.LinkPath + nextPath = currentNodeAccess.FileNode.LinkPath } else { // resolve relative link paths var parentDir string - parentDir, _ = filepath.Split(string(currentNode.RealPath)) + parentDir, _ = filepath.Split(string(currentNodeAccess.FileNode.RealPath)) // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt" - nextPath = file.Path(path.Clean(path.Join(parentDir, string(currentNode.LinkPath)))) + nextPath = file.Path(path.Clean(path.Join(parentDir, string(currentNodeAccess.FileNode.LinkPath)))) } // no more links to follow @@ -321,7 +373,7 @@ func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLink } // preserve the current Node for the next loop (in case we shouldn't follow a potentially dead link) - lastNode = currentNode + lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) if attemptedPaths.Contains(string(nextPath)) { @@ -330,18 +382,26 @@ func (t *FileTree) resolveNodeLinks(n *filenode.FileNode, followDeadBasenameLink // get the next Node (based on the next path) attemptedPaths.Add(string(nextPath)) - currentNode, err = t.resolveAncestorLinks(nextPath, attemptedPaths) + ancestorPaths := currentNodeAccess.AncestorLinkResolution + currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) + currentNodeAccess.AncestorLinkResolution = append(ancestorPaths, currentNodeAccess.AncestorLinkResolution...) if err != nil { + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) // only expected to occur upon cycle detection - return currentNode, err + return currentNodeAccess, err } } - if currentNode == nil && !followDeadBasenameLinks { + if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks { + // TODO: should we trim the node path too? + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + return lastNode, nil } - return currentNode, nil + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + + return currentNodeAccess, nil } // FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks). @@ -379,7 +439,7 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ if !path.IsAbs(match) { matchPath = file.Path(path.Join("/", match)) } - fn, err := t.node(matchPath, linkResolutionStrategy{ + fna, err := t.node(matchPath, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, DoNotFollowDeadBasenameLinks: doNotFollowDeadBasenameLinks, @@ -388,15 +448,15 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ return nil, err } // the Node must exist and should not be a directory - if fn != nil && fn.FileType != file.TypeDir { + if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDir { result := GlobResult{ MatchPath: matchPath, - RealPath: fn.RealPath, + RealPath: fna.FileNode.RealPath, // we should not be given a link Node UNLESS it is dead - IsDeadLink: fn.IsLink(), + IsDeadLink: fna.FileNode.IsLink(), } - if fn.Reference != nil { - result.Reference = *fn.Reference + if fna.FileNode.Reference != nil { + result.Reference = *fna.FileNode.Reference } results = append(results, result) } @@ -410,20 +470,20 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ // hardlink resolution is performed on the given path --which implies that the given path MUST be a real path (have no // links in constituent paths) func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeReg { + if fna.FileNode.FileType != file.TypeReg { return nil, fmt.Errorf("path=%q already exists but is NOT a regular file", realPath) } // this is a regular file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -438,20 +498,20 @@ func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) { // link path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which // implies that the given path MUST be a real path (have no links in constituent paths) func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeSymlink { + if fna.FileNode.FileType != file.TypeSymlink { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -466,20 +526,20 @@ func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Ref // path captured and returned. Note: NO symlink or hardlink resolution is performed on the given path --which // implies that the given path MUST be a real path (have no links in constituent paths) func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeHardLink { + if fna.FileNode.FileType != file.TypeHardLink { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -497,20 +557,20 @@ func (t *FileTree) AddHardLink(realPath file.Path, linkPath file.Path) (*file.Re // Note: NO symlink or hardlink resolution is performed on the given path --which implies that the given path MUST // be a real path (have no links in constituent paths) func (t *FileTree) AddDir(realPath file.Path) (*file.Reference, error) { - fn, err := t.node(realPath, linkResolutionStrategy{}) + fna, err := t.node(realPath, linkResolutionStrategy{}) if err != nil { return nil, err } - if fn != nil { + if fna.HasFileNode() { // this path already exists - if fn.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDir { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } - // this is a symlink file, provide a new or existing file.Reference - if fn.Reference == nil { - fn.Reference = file.NewFileReference(realPath) + // this is a directory, provide a new or existing file.Reference + if fna.FileNode.Reference == nil { + fna.FileNode.Reference = file.NewFileReference(realPath) } - return fn.Reference, nil + return fna.FileNode.Reference, nil } // this is a new path... add the new Node + parents @@ -532,22 +592,22 @@ func (t *FileTree) addParentPaths(realPath file.Path) error { return fmt.Errorf("unable to determine parent path while adding path=%q: %w", realPath, err) } - fn, err := t.node(parentPath, linkResolutionStrategy{}) + fna, err := t.node(parentPath, linkResolutionStrategy{}) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { // add parents of the Node until an existent parent is found it's important to do this in reverse order // to ensure we are checking the fewest amount of parents possible. var pathsToAdd []file.Path parentPaths := realPath.ConstituentPaths() for idx := len(parentPaths) - 1; idx >= 0; idx-- { - fn, err := t.node(parentPaths[idx], linkResolutionStrategy{}) + resolvedFna, err := t.node(parentPaths[idx], linkResolutionStrategy{}) if err != nil { return err } - if fn != nil { + if resolvedFna.HasFileNode() { break } pathsToAdd = append(pathsToAdd, parentPaths[idx]) @@ -588,7 +648,7 @@ func (t *FileTree) setFileNode(fn *filenode.FileNode) error { return fmt.Errorf("unable to find parent path=%q while adding path=%q", parentPath, fn.RealPath) } - return t.tree.AddChild(parentNode, fn) + return t.tree.AddChild(parentNode.FileNode, fn) } // RemovePath deletes the file.Reference from the FileTree by the given path. If the basename of the given path @@ -610,7 +670,7 @@ func (t *FileTree) RemovePath(path file.Path) error { return nil } - _, err = t.tree.RemoveNode(fn) + _, err = t.tree.RemoveNode(fn.FileNode) if err != nil { return err } @@ -632,7 +692,7 @@ func (t *FileTree) RemoveChildPaths(path file.Path) error { // can't remove child paths for Node that doesn't exist! return nil } - for _, child := range t.tree.Children(fn) { + for _, child := range t.tree.Children(fn.FileNode) { _, err := t.tree.RemoveNode(child) if err != nil { return err @@ -759,11 +819,11 @@ func (t *FileTree) merge(upper *FileTree) error { nodeCopy := *upperNode // keep original file references if the upper tree does not have them (only for the same file types) - if lowerNode != nil && lowerNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileType { - nodeCopy.Reference = lowerNode.Reference + if lowerNode != nil && lowerNode.FileNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileNode.FileType { + nodeCopy.Reference = lowerNode.FileNode.Reference } - if lowerNode != nil && upperNode.FileType != file.TypeDir && lowerNode.FileType == file.TypeDir { + if lowerNode != nil && upperNode.FileType != file.TypeDir && lowerNode.FileNode.FileType == file.TypeDir { // NOTE: both upperNode and lowerNode paths are the same, and does not have an effect // on removal of child paths err := t.RemoveChildPaths(upperNode.RealPath) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 0d3e0c27..db1d736a 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1,9 +1,13 @@ package filetree import ( + "encoding/json" "errors" "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" + "os" "testing" "github.com/anchore/stereoscope/internal" @@ -21,7 +25,7 @@ func TestFileTree_AddPath(t *testing.T) { } _, f, _ := tr.File(path) - if f != fileNode { + if f.Reference != fileNode { t.Fatal("expected pointer to the newly created fileNode") } } @@ -35,7 +39,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { } _, f, _ := tr.File(path) - if f != fileNode { + if f.Reference != fileNode { t.Fatal("expected pointer to the newly created fileNode") } @@ -46,7 +50,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { if err != nil { t.Fatalf("could not get parent Node: %+v", err) } - children := tr.tree.Children(n) + children := tr.tree.Children(n.FileNode) if len(children) != 1 { t.Fatal("unexpected child count", len(children)) @@ -364,10 +368,12 @@ func TestFileTree_Merge_Overwrite(t *testing.T) { func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) { tr1 := NewFileTree() - tr1.AddFile("/home/wagoodman/awesome/file.txt") + _, err := tr1.AddFile("/home/wagoodman/awesome/file.txt") + require.NoError(t, err) tr2 := NewFileTree() - tr2.AddFile("/home/wagoodman/.wh..wh..opq") + _, err = tr2.AddFile("/home/wagoodman/.wh..wh..opq") + require.NoError(t, err) if err := tr1.merge(tr2); err != nil { t.Fatalf("error on merge : %+v", err) @@ -455,7 +461,7 @@ func TestFileTree_Merge_DirOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if n.FileType != file.TypeDir { + if n.FileNode.FileType != file.TypeDir { t.Errorf("did not override to dir") } @@ -494,178 +500,312 @@ func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if fileNode.FileType != file.TypeReg { + if fileNode.FileNode.FileType != file.TypeReg { t.Errorf("did not override to dir") } } +func TestFileTree_File_MultiSymlink(t *testing.T) { + var err error + tr := NewFileTree() + + _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/link-to-1", "/1") + require.NoError(t, err) + + _, err = tr.AddDir("/1") + require.NoError(t, err) + + _, err = tr.AddFile("/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/link-to-place", "/place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt") + require.NoError(t, err) + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + // request: /home/wagoodman/file.txt + // reference: /2/real-file.txt + // ancestor resolution: + // - /home -> /link-to-1/link-to-place + // - /link-to-1 -> /1 + // - /1/link-to-place -> /place + // leaf resolution: + // - /place/wagoodman/file.txt -> /link-to-1/file.txt + // - /link-to-1 -> /1 + // - /1/file.txt -> /2/real-file.txt + // path: + // - home -> link-to-1/link-to-place -> place + // - place/wagoodman + // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt + + requestPath := "/home/wagoodman/file.txt" + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.NoError(t, err) + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + require.NoError(t, enc.Encode(ref)) + //t.Fatal("nope") + +} + func TestFileTree_File_Symlink(t *testing.T) { tests := []struct { - name string - buildLinkSource file.Path // ln -s DEST - buildLinkDest file.Path // ln -s SOURCE - buildRealPath file.Path // a real file that should exist (or not if "") - linkOptions []LinkResolutionOption - requestPath file.Path // the path to check against - expectedExists bool // if the request path should exist or not - expectedResolvedPath file.Path // the expected path for a request result - expectedErr bool // if an error is expected from the request - expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" + name string + buildLinkSource file.Path // ln -s DEST + buildLinkDest file.Path // ln -s SOURCE + buildRealPath file.Path // a real file that should exist (or not if "") + linkOptions []LinkResolutionOption + requestPath file.Path // the path to check against + expectedExists bool // if the request path should exist or not + expectedErr bool // if an error is expected from the request + expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" + expected *file.ReferenceVia }{ /////////////// - { - name: "request base is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/another/place", - // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup - expectedRealRef: true, - }, - { - name: "request base is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/home", - // /home is just a symlink, not the real file (which is at /another/place) - expectedRealRef: false, - }, - - /////////////// - { - name: "request parent is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - { - name: "request parent is ABSOLUTE symlink", - buildLinkSource: "/home", - buildLinkDest: "/another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - - /////////////// - { - name: "request base is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - requestPath: "/home", - expectedExists: true, - expectedResolvedPath: "/another/place", - expectedRealRef: true, - }, - { - name: "request base is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place/wagoodman", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, - requestPath: "/home", - expectedExists: true, - // note that since the request matches the link source and we are NOT following, we get the link ref back - expectedResolvedPath: "/home", - expectedRealRef: false, - }, - /////////////// - { - name: "request parent is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - { - name: "request parent is RELATIVE symlink", - buildLinkSource: "/home", - buildLinkDest: "../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - /////////////// - { - name: "request base is DEAD symlink", - buildLinkSource: "/home", - buildLinkDest: "/mwahaha/i/go/to/nowhere", - linkOptions: []LinkResolutionOption{}, - requestPath: "/home", - // since we did not follow, the paths should exist to the symlink file - expectedResolvedPath: "/home", - expectedExists: true, - }, - { - name: "request base is DEAD symlink", - buildLinkSource: "/home", - buildLinkDest: "/mwahaha/i/go/to/nowhere", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - requestPath: "/home", - // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - expectedResolvedPath: "/mwahaha", - expectedExists: false, - }, - { - name: "request base is DEAD symlink (which we don't follow)", - buildLinkSource: "/home", - buildLinkDest: "/mwahaha/i/go/to/nowhere", - linkOptions: []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}, - requestPath: "/home", - // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - expectedResolvedPath: "/home", - expectedExists: true, - }, - /////////////// - // trying to resolve to above root - { - name: "request parent is RELATIVE symlink to ABOVE root", - buildLinkSource: "/home", - buildLinkDest: "../../../../../../../../../../../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, - { - name: "request parent is RELATIVE symlink to ABOVE root", - buildLinkSource: "/home", - buildLinkDest: "../../../../../../../../../../../../another/place", - buildRealPath: "/another/place/wagoodman", - linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - requestPath: "/home/wagoodman", - expectedExists: true, - expectedResolvedPath: "/another/place/wagoodman", - expectedRealRef: true, - }, + //{ + // name: "request base is ABSOLUTE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "/another/place", + // buildRealPath: "/another/place", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + // requestPath: "/home", + // // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup + // expectedRealRef: true, + // expectedExists: true, + // expected: &file.ReferenceVia{ + // Reference: &file.Reference{RealPath: "/another/place"}, + // RequestPath: "/home", + // LeafResolution: []file.ReferenceAccess{ + // { + // RequestPath: "/home", + // Reference: &file.Reference{RealPath: "/home"}, + // }, + // { + // RequestPath: "/another/place", + // Reference: &file.Reference{RealPath: "/another/place"}, + // }, + // }, + // }, + //}, + //{ + // name: "request base is ABSOLUTE symlink, request no link resolution", + // buildLinkSource: "/home", + // buildLinkDest: "/another/place", + // buildRealPath: "/another/place", + // linkOptions: []LinkResolutionOption{}, + // requestPath: "/home", + // // /home is just a symlink, not the real file (which is at /another/place)... and we've provided no symlink resolution + // expectedRealRef: false, + // expectedExists: true, + // expected: &file.ReferenceVia{ + // Reference: &file.Reference{RealPath: "/home"}, // this is the real symlink + // RequestPath: "/home", + // LeafResolution: nil, + // }, + //}, + // + ///////////////////// + //{ + // name: "request parent is ABSOLUTE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "/another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedRealRef: true, + // expected: &file.ReferenceVia{ + // Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // RequestPath: "/home/wagoodman", + // LinkResolution: file.LinkResolution{ + // AncestorResolution: []file.ReferenceAccess{ + // { + // RequestPath: "/home", + // Reference: &file.Reference{RealPath: "/home"}, + // }, + // { + // RequestPath: "/another/place", + // Reference: nil, + // }, + // }, + // LeafResolution: []file.ReferenceAccess{ + // { + // RequestPath: "/another/place/wagoodman", + // Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + // LinkResolution: file.LinkResolution{ + // AncestorResolution: []file.ReferenceAccess{ + // { + // RequestPath: "/home", + // Reference: &file.Reference{RealPath: "/home"}, // note: this was explicitly added in the tree + // }, + // { + // RequestPath: "/another/place", + // Reference: nil, + // }, + // }, + // }, + // }, + // }, + // }, + // }, + //}, + //{ + // name: "request parent is ABSOLUTE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "/another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedResolvedPath: "/another/place/wagoodman", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + // + ///////////////// + //{ + // name: "request base is RELATIVE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "../../another/place", + // buildRealPath: "/another/place", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + // requestPath: "/home", + // expectedExists: true, + // expectedResolvedPath: "/another/place", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + //{ + // name: "request base is RELATIVE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "../../another/place/wagoodman", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{}, + // requestPath: "/home", + // expectedExists: true, + // // note that since the request matches the link source and we are NOT following, we get the link ref back + // expectedResolvedPath: "/home", + // expectedRealRef: false, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + ///////////////// + //{ + // name: "request parent is RELATIVE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "../../another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedResolvedPath: "/another/place/wagoodman", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + //{ + // name: "request parent is RELATIVE symlink", + // buildLinkSource: "/home", + // buildLinkDest: "../../another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedResolvedPath: "/another/place/wagoodman", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + ///////////////// + //{ + // name: "request base is DEAD symlink", + // buildLinkSource: "/home", + // buildLinkDest: "/mwahaha/i/go/to/nowhere", + // linkOptions: []LinkResolutionOption{}, + // requestPath: "/home", + // // since we did not follow, the paths should exist to the symlink file + // expectedResolvedPath: "/home", + // expectedExists: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + //{ + // name: "request base is DEAD symlink", + // buildLinkSource: "/home", + // buildLinkDest: "/mwahaha/i/go/to/nowhere", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + // requestPath: "/home", + // // we are following the path, which goes to nowhere.... the first failed path is resolved and returned + // expectedResolvedPath: "/mwahaha", + // expectedExists: false, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + //{ + // name: "request base is DEAD symlink (which we don't follow)", + // buildLinkSource: "/home", + // buildLinkDest: "/mwahaha/i/go/to/nowhere", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}, + // requestPath: "/home", + // // we are following the path, which goes to nowhere.... the first failed path is resolved and returned + // expectedResolvedPath: "/home", + // expectedExists: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + ///////////////// + //// trying to resolve to above root + //{ + // name: "request parent is RELATIVE symlink to ABOVE root", + // buildLinkSource: "/home", + // buildLinkDest: "../../../../../../../../../../../../another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedResolvedPath: "/another/place/wagoodman", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, + //{ + // name: "request parent is RELATIVE symlink to ABOVE root", + // buildLinkSource: "/home", + // buildLinkDest: "../../../../../../../../../../../../another/place", + // buildRealPath: "/another/place/wagoodman", + // linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + // requestPath: "/home/wagoodman", + // expectedExists: true, + // expectedResolvedPath: "/another/place/wagoodman", + // expectedRealRef: true, + // expectedAccessRequestPaths: []string{}, + // expectedAccessRealPaths: []string{}, + //}, } for _, test := range tests { @@ -700,22 +840,17 @@ func TestFileTree_File_Symlink(t *testing.T) { t.Fatalf("expected path to exist, but does NOT") } - // validate ref... - if realRef != nil && ref != nil { - // validate path... - if ref.RealPath != test.expectedResolvedPath { - t.Fatalf("unexpected path difference: %+v != %v", ref.RealPath, test.expectedResolvedPath) - } + // validate the resolved reference against the real reference added to the tree + if ref.ID() == realRef.ID() && !test.expectedRealRef { + t.Errorf("refs should not be the same: resolve(%+v) == reaal(%+v)", ref, realRef) + } else if ref.ID() != realRef.ID() && test.expectedRealRef { + t.Errorf("refs should be the same: resolve(%+v) != real(%+v)", ref, realRef) + } - if ref.ID() == realRef.ID() && !test.expectedRealRef { - t.Errorf("refs should not be the same: resolve(%+v) == reaal(%+v)", ref, realRef) - } else if ref.ID() != realRef.ID() && test.expectedRealRef { - t.Errorf("refs should be the same: resolve(%+v) != real(%+v)", ref, realRef) - } - } else { - if test.expectedRealRef { - t.Fatalf("expected to test a real reference, but could not") - } + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(test.expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) } }) } @@ -816,30 +951,26 @@ func TestFileTree_AllFiles(t *testing.T) { for _, p := range paths { _, err := tr.AddFile(file.Path(p)) - if err != nil { - t.Fatalf("failed to add path ('%s'): %+v", p, err) - } + require.NoError(t, err) } var err error + var f *file.Reference // dir - _, err = tr.AddDir("/home") - if err != nil { - t.Fatalf("could not setup dir: %+v", err) - } + f, err = tr.AddDir("/home") + require.NotNil(t, f) + require.NoError(t, err) // relative symlink - _, err = tr.AddSymLink("/home/symlink", "../../../sym-linked-dest") - if err != nil { - t.Fatalf("could not setup link: %+v", err) - } + f, err = tr.AddSymLink("/home/symlink", "../../../sym-linked-dest") + require.NotNil(t, f) + require.NoError(t, err) // hardlink - _, err = tr.AddHardLink("/home/hardlink", "/hard-linked-dest") - if err != nil { - t.Fatalf("could not setup link: %+v", err) - } + f, err = tr.AddHardLink("/home/hardlink", "/hard-linked-dest") + require.NotNil(t, f) + require.NoError(t, err) tests := []struct { name string diff --git a/pkg/filetree/glob.go b/pkg/filetree/glob.go index 0a650c6f..72611c80 100644 --- a/pkg/filetree/glob.go +++ b/pkg/filetree/glob.go @@ -59,7 +59,7 @@ func isInPathResolutionLoop(path string, ft *FileTree) (bool, error) { if err != nil { return false, err } - allPathSet.Add(file.Path(fn.ID())) + allPathSet.Add(file.Path(fn.FileNode.ID())) } // we want to allow for getting children out of the first iteration of a infinite path, but NOT allowing // beyond the second iteration down an infinite path. @@ -109,7 +109,7 @@ func (f *fileAdapter) ReadDir(n int) ([]fs.DirEntry, error) { return ret, err } - for idx, child := range f.filetree.tree.Children(fn) { + for idx, child := range f.filetree.tree.Children(fn.FileNode) { if idx == n && n != -1 { break } @@ -148,7 +148,7 @@ func (a *osAdapter) ReadDir(name string) ([]fs.DirEntry, error) { return ret, err } - for _, child := range a.filetree.tree.Children(fn) { + for _, child := range a.filetree.tree.Children(fn.FileNode) { requestPath := path.Join(name, filepath.Base(string(child.ID()))) r, err := a.Lstat(requestPath) if err == nil { @@ -173,13 +173,13 @@ func (a *osAdapter) Lstat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil { + if fn == nil || fn.FileNode == nil { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn, + Node: *fn.FileNode, }, nil } @@ -202,12 +202,12 @@ func (a *osAdapter) Stat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil { + if fn == nil || fn.FileNode == nil { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn, + Node: *fn.FileNode, }, nil } diff --git a/pkg/filetree/union_filetree_test.go b/pkg/filetree/union_filetree_test.go index caeacc14..594252e1 100644 --- a/pkg/filetree/union_filetree_test.go +++ b/pkg/filetree/union_filetree_test.go @@ -65,7 +65,7 @@ func TestUnionFileTree_Squash(t *testing.T) { } _, f, _ = base.File("/home/wagoodman/more") - if f == nil { + if f == nil || f.Reference == nil { t.Fatal("base was never created") } @@ -74,17 +74,17 @@ func TestUnionFileTree_Squash(t *testing.T) { } _, f, _ = top.File("/home/wagoodman/more") - if f != nil { + if f.Reference != nil { t.Fatal("top file should have been implicitly nil but wasn't") } _, f, _ = squashed.File("/home/wagoodman/more") - if f != nil { + if f.Reference != nil { t.Fatal("file override to a dir has original properties") } _, f, _ = squashed.File("/home/wagoodman/moredir") - if f == nil { + if f == nil || f.Reference == nil { t.Fatal("dir override to a dir is missing original properties") } if originalMoreDir.ID() != f.ID() { diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index 01ac77ad..b40f406f 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -11,15 +11,15 @@ import ( // fetchFileContentsByPath is a common helper function for resolving the file contents for a path from the file // catalog relative to the given tree. func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, path file.Path) (io.ReadCloser, error) { - exists, fileReference, err := ft.File(path, filetree.FollowBasenameLinks) + exists, refVia, err := ft.File(path, filetree.FollowBasenameLinks) if err != nil { return nil, err } - if !exists && fileReference == nil { + if !exists && refVia == nil || refVia.Reference == nil { return nil, fmt.Errorf("could not find file path in Tree: %s", path) } - reader, err := fileCatalog.FileContents(*fileReference) + reader, err := fileCatalog.FileContents(*refVia.Reference) if err != nil { return nil, err } @@ -28,51 +28,52 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa // fetchFileContentsByPath is a common helper function for resolving file references for a MIME type from the file // catalog relative to the given tree. -func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.Reference, error) { +func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.ReferenceVia, error) { fileEntries, err := fileCatalog.GetByMIMEType(mType) if err != nil { return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) } // since this query is related to the contents of the path, this should be a strict file ID match - return filterCatalogFilesRelativesToTree(ft, fileEntries, true, filetree.FollowBasenameLinks) + return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) } // fetchFilesByExtension is a common helper function for resolving file references for a file extension from the file // catalog relative to the given tree. -func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.Reference, error) { +func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.ReferenceVia, error) { fileEntries, err := fileCatalog.GetByExtension(extension) if err != nil { return nil, fmt.Errorf("unable to fetch file references by extension (%q): %w", extension, err) } - return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) + return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) } // fetchFilesByBasename is a common helper function for resolving file references for a file basename // catalog relative to the given tree. -func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.Reference, error) { +func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.ReferenceVia, error) { fileEntries, err := fileCatalog.GetByBasename(basename) if err != nil { return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", basename, err) } - return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) + return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) } // fetchFilesByBasenameGlob is a common helper function for resolving file references for a file basename glob pattern // catalog relative to the given tree. -func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlob string) ([]file.Reference, error) { - fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlob) +func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlobs ...string) ([]file.ReferenceVia, error) { + fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlobs...) if err != nil { - return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlob, err) + return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlobs, err) } - return filterCatalogFilesRelativesToTree(ft, fileEntries, false, filetree.FollowBasenameLinks) + return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) } -func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, strictFileID bool, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.Reference, error) { - var refs []file.Reference +func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.ReferenceVia, error) { + var refs []file.ReferenceVia +allFileEntries: for _, entry := range fileEntries { _, ref, err := ft.File(entry.File.RealPath, linkResolutionOpts...) if err != nil { @@ -83,12 +84,15 @@ func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []File continue } - if strictFileID && ref.ID() != entry.File.ID() { - continue + for _, accessRef := range ref.AccessReferences() { + if accessRef.ID() == entry.File.ID() { + // we know this entry exists in the tree, keep track of the reference for this file + refs = append(refs, *ref) + continue allFileEntries + } } - // we know this entry exists in the tree, keep track of the reference for this file - refs = append(refs, *ref) + // we did not find a matching file ID in the tree, so drop this entry } return refs, nil } diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index 4ef8d61b..dd8a5ed3 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -2,8 +2,10 @@ package image import ( "fmt" + "github.com/scylladb/go-set/strset" "io" "path" + "sort" "strings" "sync" @@ -22,7 +24,7 @@ type FileCatalog struct { byMIMEType map[string][]file.ID byExtension map[string][]file.ID byBasename map[string][]file.ID - basenames []string + basenames *strset.Set } // FileCatalogEntry represents all stored metadata for a single file reference. @@ -40,6 +42,7 @@ func NewFileCatalog() FileCatalog { byMIMEType: make(map[string][]file.ID), byExtension: make(map[string][]file.ID), byBasename: make(map[string][]file.ID), + basenames: strset.New(), } } @@ -58,10 +61,12 @@ func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener fi basename := path.Base(string(f.RealPath)) c.byBasename[basename] = append(c.byBasename[basename], id) - c.basenames = append(c.basenames, basename) + c.basenames.Add(basename) + //fmt.Println("Adding file to catalog: ", f.RealPath, " (", id, ")") for _, ext := range fileExtensions(string(f.RealPath)) { c.byExtension[ext] = append(c.byExtension[ext], id) + //fmt.Println(" Extensions ("+ext+"): ", c.byExtension[ext]) } c.catalog[id] = FileCatalogEntry{ @@ -96,7 +101,9 @@ func (c *FileCatalog) Basenames() []string { c.RLock() defer c.RUnlock() - return c.basenames + bns := c.basenames.List() + sort.Strings(bns) + return bns } func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) { @@ -166,27 +173,31 @@ func (c *FileCatalog) GetByBasename(basename string) ([]FileCatalogEntry, error) return entries, nil } -func (c *FileCatalog) GetByBasenameGlob(glob string) ([]FileCatalogEntry, error) { +func (c *FileCatalog) GetByBasenameGlob(globs ...string) ([]FileCatalogEntry, error) { c.RLock() defer c.RUnlock() - if strings.Contains(glob, "**") { - return nil, fmt.Errorf("basename glob patterns with '**' are not supported") - } - if strings.Contains(glob, "/") { - return nil, fmt.Errorf("found directory separator in a basename") - } + var fileEntries []FileCatalogEntry + basenames := c.Basenames() - patternObj := wildmatch.NewWildMatch(glob) + for _, glob := range globs { + if strings.Contains(glob, "**") { + return nil, fmt.Errorf("basename glob patterns with '**' are not supported") + } + if strings.Contains(glob, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } - var fileEntries []FileCatalogEntry - for _, b := range c.Basenames() { - if patternObj.IsMatch(b) { - bns, err := c.GetByBasename(b) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) + patternObj := wildmatch.NewWildMatch(glob) + + for _, b := range basenames { + if patternObj.IsMatch(b) { + bns, err := c.GetByBasename(b) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) + } + fileEntries = append(fileEntries, bns...) } - fileEntries = append(fileEntries, bns...) } } diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index a22614d1..75061150 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -502,15 +502,6 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { name: "get existing file name", input: "file-1.*", want: []FileCatalogEntry{ - { - File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, - Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-1.txt", - TarHeaderName: "path/branch.d/one/file-1.txt", - TypeFlag: 48, - MIMEType: "text/plain", - }, - }, { File: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ @@ -520,6 +511,15 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { TypeFlag: 50, }, }, + { + File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + TarHeaderName: "path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", + }, + }, }, }, { @@ -531,26 +531,6 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { name: "get directory name", input: "bran*.d", want: []FileCatalogEntry{ - // below is the unique behavior to this function... - { - File: file.Reference{RealPath: "/path/branch.d"}, - Metadata: file.Metadata{ - Path: "/path/branch.d", - TarHeaderName: "path/branch.d/", - TypeFlag: 53, - IsDir: true, - }, - }, - { - File: file.Reference{RealPath: "/path/common/branch.d"}, - Metadata: file.Metadata{ - Path: "/path/common/branch.d", - TarHeaderName: "path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, - }, - }, - // below is the same as ByBasename() { File: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ @@ -744,7 +724,6 @@ func TestFileCatalog_GetBasenames(t *testing.T) { ".file-4.tar.gz", "branch", "branch.d", - "branch.d", "common", "file-1.d", "file-1.txt", diff --git a/pkg/image/image.go b/pkg/image/image.go index 8d7400c5..031f72c0 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -268,8 +268,8 @@ func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. -func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { - var refs []file.Reference +func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceVia, error) { + var refs []file.ReferenceVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(i.SquashedTree(), &i.FileCatalog, ty) if err != nil { @@ -281,18 +281,18 @@ func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference } // FilesByExtensionFromSquash returns file references for files that have the given extension relative to the squash tree. -func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.Reference, error) { +func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.ReferenceVia, error) { return fetchFilesByExtension(i.SquashedTree(), &i.FileCatalog, extension) } // FilesByBasenameFromSquash returns file references for files with the given basename relative to the squash tree. -func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.Reference, error) { +func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.ReferenceVia, error) { return fetchFilesByBasename(i.SquashedTree(), &i.FileCatalog, basename) } // FilesByBasenameGlobFromSquash returns file references for files with the given basename glob pattern relative to the squash tree. -func (i *Image) FilesByBasenameGlobFromSquash(glob string) ([]file.Reference, error) { - return fetchFilesByBasenameGlob(i.SquashedTree(), &i.FileCatalog, glob) +func (i *Image) FilesByBasenameGlobFromSquash(globs ...string) ([]file.ReferenceVia, error) { + return fetchFilesByBasenameGlob(i.SquashedTree(), &i.FileCatalog, globs...) } // FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. @@ -304,7 +304,7 @@ func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { // ResolveLinkByLayerSquash resolves a symlink or hardlink for the given file reference relative to the result from // the layer squash of the given layer index argument. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.Reference, error) { +func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.ReferenceVia, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[layer].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err @@ -312,7 +312,7 @@ func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options // ResolveLinkByImageSquash resolves a symlink or hardlink for the given file reference relative to the result from the image squash. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.Reference, error) { +func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.ReferenceVia, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[len(i.Layers)-1].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 99ebba45..815a2386 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -155,8 +155,8 @@ func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { } // FilesByMIMEType returns file references for files that match at least one of the given MIME types relative to each layer tree. -func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { - var refs []file.Reference +func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.ReferenceVia, error) { + var refs []file.ReferenceVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(l.Tree, l.fileCatalog, ty) if err != nil { @@ -168,8 +168,8 @@ func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types relative to the squashed file tree representation. -func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { - var refs []file.Reference +func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceVia, error) { + var refs []file.ReferenceVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(l.SquashedTree, l.fileCatalog, ty) if err != nil { @@ -181,32 +181,32 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference } // FilesByExtension returns file references for files that have the given extension. -func (l *Layer) FilesByExtension(extension string) ([]file.Reference, error) { +func (l *Layer) FilesByExtension(extension string) ([]file.ReferenceVia, error) { return fetchFilesByExtension(l.Tree, l.fileCatalog, extension) } // FilesByExtensionFromSquash returns file references for files have the given extension relative to the squash tree. -func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.Reference, error) { +func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.ReferenceVia, error) { return fetchFilesByExtension(l.SquashedTree, l.fileCatalog, extension) } // FilesByBasename returns file references for files that have the following basename. -func (l *Layer) FilesByBasename(basename string) ([]file.Reference, error) { +func (l *Layer) FilesByBasename(basename string) ([]file.ReferenceVia, error) { return fetchFilesByBasename(l.Tree, l.fileCatalog, basename) } // FilesByBasenameFromSquash returns file references for files by name relative to the squash tree. -func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.Reference, error) { +func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.ReferenceVia, error) { return fetchFilesByBasename(l.SquashedTree, l.fileCatalog, extension) } // FilesByBasenameGlob returns file references for files that have the following basename glob. -func (l *Layer) FilesByBasenameGlob(glob string) ([]file.Reference, error) { +func (l *Layer) FilesByBasenameGlob(glob string) ([]file.ReferenceVia, error) { return fetchFilesByBasenameGlob(l.Tree, l.fileCatalog, glob) } // FilesByBasenameGlobFromSquash returns file references for files by basename glob pattern relative to the squash tree. -func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.Reference, error) { +func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.ReferenceVia, error) { return fetchFilesByBasenameGlob(l.SquashedTree, l.fileCatalog, glob) } From 21001f1d6d0fc9dfb5bffe9065453ad19f115cf9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 27 Jan 2023 17:29:21 -0500 Subject: [PATCH 03/35] add leaf link resolution on tree responses (defer ancestor link resolution) Signed-off-by: Alex Goodman --- pkg/file/reference.go | 95 +-- pkg/file/reference_test.go | 297 +++++++++ pkg/filetree/depth_first_path_walker.go | 12 +- pkg/filetree/filetree.go | 121 ++-- pkg/filetree/filetree_test.go | 599 +++++++++++------- pkg/image/content_helpers.go | 16 +- pkg/image/file_catalog.go | 7 +- pkg/image/image.go | 14 +- pkg/image/layer.go | 20 +- .../fixture_image_opaque_directory_test.go | 2 +- .../fixture_image_symlinks_test.go | 4 +- 11 files changed, 831 insertions(+), 356 deletions(-) create mode 100644 pkg/file/reference_test.go diff --git a/pkg/file/reference.go b/pkg/file/reference.go index d200dd77..a2c0810c 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -9,54 +9,80 @@ var nextID = 0 // ID is used for file tree manipulation to uniquely identify tree nodes. type ID uint64 -type LinkResolution struct { - AncestorResolution []ReferenceAccess - LeafResolution []ReferenceAccess -} - -// ReferenceAccess represents the fetching of a file reference via a (possibly different) path. +// ReferenceAccess represents the fetching of a possibly non-existent file, and how it was accessed. type ReferenceAccess struct { RequestPath Path *Reference } -// ReferenceVia represents a unique file, and how it was accessed, showing full symlink resolution. -type ReferenceVia struct { +// ReferenceAccessVia represents a possibly non-existent file, and how it was accessed, including all symlink and hardlink resolution. +type ReferenceAccessVia struct { ReferenceAccess - LinkResolution + LeafLinkResolution []ReferenceAccess +} + +func (f *ReferenceAccessVia) HasReference() bool { + if f == nil { + return false + } + return f.Reference != nil } -// RequestPaths represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceVia) RequestPaths() []Path { - //paths := []Path{f.RequestPath} +// RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceAccessVia) RequestResolutionPath() []Path { var paths []Path - for _, p := range f.LeafResolution { + var firstPath Path + var lastLinkResolutionIsDead bool + + if string(f.RequestPath) != "" { + firstPath = f.RequestPath + paths = append(paths, f.RequestPath) + } + for i, p := range f.LeafLinkResolution { + if i == 0 && p.RequestPath == f.RequestPath { + // ignore link resolution that starts with the same user requested path + continue + } + if firstPath == "" { + firstPath = p.RequestPath + } + paths = append(paths, p.RequestPath) + + if i == len(f.LeafLinkResolution)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } + } + } + if f.HasReference() && firstPath != f.Reference.RealPath && !lastLinkResolutionIsDead { + // we've reached the final reference that was resolved + // we should only do this if there was a link resolution + paths = append(paths, f.Reference.RealPath) } return paths } -// AccessReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceVia) AccessReferences() []*Reference { - var refs []*Reference - for _, p := range f.LeafResolution { - refs = append(refs, p.Reference) - } - //refs = append(refs, f.Reference) - return refs -} +// ResolutionReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceAccessVia) ResolutionReferences() []Reference { + var refs []Reference + var lastLinkResolutionIsDead bool -// RealPaths represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceVia) RealPaths() []Path { - var refs []Path - for _, p := range f.LeafResolution { + for i, p := range f.LeafLinkResolution { if p.Reference != nil { - refs = append(refs, p.Reference.RealPath) + refs = append(refs, *p.Reference) + } + if i == len(f.LeafLinkResolution)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } } } - //if f.Reference != nil { - // refs = append(refs, f.Reference.RealPath) - //} + if f.Reference != nil && !lastLinkResolutionIsDead { + refs = append(refs, *f.Reference) + } return refs } @@ -67,16 +93,13 @@ type Reference struct { } // NewFileReferenceVia shows how a reference was accessed. -func NewFileReferenceVia(path Path, ref *Reference, ancestors []ReferenceAccess, leafs []ReferenceAccess) *ReferenceVia { - return &ReferenceVia{ +func NewFileReferenceVia(path Path, ref *Reference, leafs []ReferenceAccess) *ReferenceAccessVia { + return &ReferenceAccessVia{ ReferenceAccess: ReferenceAccess{ RequestPath: path, Reference: ref, }, - LinkResolution: LinkResolution{ - AncestorResolution: ancestors, - LeafResolution: leafs, - }, + LeafLinkResolution: leafs, } } diff --git a/pkg/file/reference_test.go b/pkg/file/reference_test.go new file mode 100644 index 00000000..1d9b000b --- /dev/null +++ b/pkg/file/reference_test.go @@ -0,0 +1,297 @@ +package file + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestReferenceAccessVia_RequestPaths(t *testing.T) { + tests := []struct { + name string + subject ReferenceAccessVia + want []Path + }{ + { + name: "empty", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{}, + LeafLinkResolution: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + }, + LeafLinkResolution: nil, + }, + want: []Path{ + "/home/wagoodman/file.txt", + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Path{ + "/home", + "/another/place", + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference (that does not exist) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.RequestResolutionPath(), "RequestResolutionPath()") + }) + } +} + +func TestReferenceAccessVia_AccessReferences(t *testing.T) { + type fields struct { + ReferenceAccess ReferenceAccess + LeafLinkResolution []ReferenceAccess + } + tests := []struct { + name string + subject ReferenceAccessVia + want []Reference + }{ + { + name: "empty", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{}, + LeafLinkResolution: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + }, + LeafLinkResolution: nil, + }, + want: []Reference{ + { + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/home"}, + {RealPath: "/another/place"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + {RealPath: "/2/real-file.txt"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + LeafLinkResolution: []ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.ResolutionReferences(), "ResolutionReferences()") + + }) + } +} diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index 54c6e48a..586aa07b 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -55,7 +55,7 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions } // nolint:gocognit -func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *nodeAccess, error) { +func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) { w.pathStack.Push(from) var currentPath file.Path @@ -73,17 +73,17 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *nodeAccess, err if err != nil { return "", nil, err } - if currentNode == nil || currentNode.FileNode == nil { + if !currentNode.HasFileNode() { return "", nil, fmt.Errorf("nil Node at path=%q", currentPath) } // prevent infinite loop if strings.Count(string(currentPath.Normalize()), file.DirSeparator) >= maxDirDepth { - return currentPath, currentNode, ErrMaxTraversalDepth + return currentPath, currentNode.FileNode, ErrMaxTraversalDepth } if w.conditions.ShouldTerminate != nil && w.conditions.ShouldTerminate(currentPath, *currentNode.FileNode) { - return currentPath, currentNode, nil + return currentPath, currentNode.FileNode, nil } currentPath = currentPath.Normalize() @@ -92,7 +92,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *nodeAccess, err if w.conditions.ShouldVisit == nil || w.conditions.ShouldVisit != nil && w.conditions.ShouldVisit(currentPath, *currentNode.FileNode) { err := w.visitor(currentPath, *currentNode.FileNode) if err != nil { - return currentPath, currentNode, err + return currentPath, currentNode.FileNode, err } w.visitedPaths.Add(currentPath) } @@ -113,7 +113,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *nodeAccess, err } } - return currentPath, currentNode, nil + return currentPath, currentNode.FileNode, nil } func (w *DepthFirstPathWalker) WalkAll() error { diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 93351680..420df97c 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -20,10 +20,9 @@ var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") // nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. type nodeAccess struct { - RequestPath file.Path - FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) - AncestorLinkResolution []nodeAccess - LeafLinkResolution []nodeAccess + RequestPath file.Path + FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) + LeafLinkResolution []nodeAccess } func (na *nodeAccess) HasFileNode() bool { @@ -90,7 +89,7 @@ func (t *FileTree) AllRealPaths() []file.Path { } func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { - n, err := t.node(dir, linkResolutionStrategy{ + fna, err := t.node(dir, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) @@ -98,16 +97,16 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, err } - if n == nil { + if !fna.HasFileNode() { return nil, nil } - if n.FileNode.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDir { return nil, nil } var listing []file.Path - children := t.tree.Children(n.FileNode) + children := t.tree.Children(fna.FileNode) for _, child := range children { if child == nil { continue @@ -127,7 +126,7 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { } // File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. -func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceVia, error) { +func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) { userStrategy := newLinkResolutionStrategy(options...) // For: /some/path/here // Where: /some/path -> /other/place @@ -146,24 +145,22 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, // // Therefore we can safely lookup the path first without worrying about symlink resolution yet... if there is a // hit, return it! If not, fallback to symlink resolution. - //currentNode, err := t.node(path, linkResolutionStrategy{}) - //if err != nil { - // return false, nil, err - //} - //if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) { - // return true, file.NewFileReferenceVia( - // path, - // currentNode.FileNode.Reference, - // newReferenceAccessPath(currentNode.AncestorLinkResolution), - // newReferenceAccessPath(currentNode.LeafLinkResolution), - // ), nil - //} + currentNode, err := t.node(path, linkResolutionStrategy{}) + if err != nil { + return false, nil, err + } + if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) { + return true, file.NewFileReferenceVia( + path, + currentNode.FileNode.Reference, + newReferenceAccessPath(currentNode.LeafLinkResolution), + ), nil + } // symlink resolution!... within the context of container images (which is outside of the responsibility of this object) // the only really valid resolution of symlinks is in squash trees (both for an image and a layer --NOT for trees // that represent a single union FS layer. - - currentNode, err := t.node(path, linkResolutionStrategy{ + currentNode, err = t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: userStrategy.FollowBasenameLinks, DoNotFollowDeadBasenameLinks: userStrategy.DoNotFollowDeadBasenameLinks, @@ -172,7 +169,6 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, return true, file.NewFileReferenceVia( path, currentNode.FileNode.Reference, - newReferenceAccessPath(currentNode.AncestorLinkResolution), newReferenceAccessPath(currentNode.LeafLinkResolution), ), err } @@ -181,13 +177,19 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, func newReferenceAccessPath(nodePath []nodeAccess) []file.ReferenceAccess { var refPath []file.ReferenceAccess - for _, n := range nodePath { - refPath = append(refPath, - file.ReferenceAccess{ - Reference: n.FileNode.Reference, - RequestPath: n.RequestPath, - }, - ) + for i, n := range nodePath { + if i == len(nodePath)-1 && n.FileNode != nil { + // this is already on the parent ReferenceAccessVia object (unless it is a dead link) + break + } + access := file.ReferenceAccess{ + RequestPath: n.RequestPath, + } + if n.FileNode != nil { + access.Reference = n.FileNode.Reference + } + + refPath = append(refPath, access) } return refPath } @@ -198,7 +200,10 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce if !strategy.FollowLinks() { n := t.tree.Node(nodeID) if n == nil { - return nil, nil + return &nodeAccess{ + RequestPath: normalizedPath, + FileNode: nil, + }, nil } return &nodeAccess{ RequestPath: normalizedPath, @@ -250,7 +255,6 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. var pathParts = strings.Split(string(path), file.DirSeparator) var currentPathStr string var currentPath file.Path - var nodePath []nodeAccess // iterate through all parts of the path, replacing path elements with link resolutions where possible. for idx, part := range pathParts { @@ -275,7 +279,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. // 1. the current path is really invalid and we should return NIL indicating that it cannot be resolved. // 2. the current path is a link? no, this isn't possible since we are iterating through constituent paths // in order, so we are guaranteed to hit parent links in which we should adjust the search path accordingly. - return nil, nil + return currentNodeAccess, nil } // keep track of what we've resolved to so far... @@ -296,7 +300,6 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptedPaths) if err != nil { // only expected to happen on cycles - currentNodeAccess.AncestorLinkResolution = append(currentNodeAccess.AncestorLinkResolution, nodePath...) return currentNodeAccess, err } if currentNodeAccess.HasFileNode() { @@ -304,17 +307,15 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. } currentPathStr = string(currentPath) } - - nodePath = append(nodePath, *currentNodeAccess) } // by this point we have processed all constituent paths; there were no un-added paths and the path is guaranteed // to have followed link resolution. - currentNodeAccess.AncestorLinkResolution = append(currentNodeAccess.AncestorLinkResolution, nodePath...) return currentNodeAccess, nil } // followNode takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). +// nolint: funlen func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") @@ -328,6 +329,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks var lastNode *nodeAccess var nodePath []nodeAccess + var nextPath file.Path currentNodeAccess := n @@ -339,6 +341,10 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // if there is no next path, return this reference (dead link) if !currentNodeAccess.HasFileNode() { + // the last path we tried to resolve is a dead link, persist the original path as the failed request + if len(nodePath) > 0 { + nodePath[len(nodePath)-1].RequestPath = nextPath + } break } @@ -355,7 +361,6 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // prepare for the next iteration alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) - var nextPath file.Path if currentNodeAccess.FileNode.LinkPath.IsAbsolutePath() { // use links with absolute paths blindly nextPath = currentNodeAccess.FileNode.LinkPath @@ -382,25 +387,27 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // get the next Node (based on the next path) attemptedPaths.Add(string(nextPath)) - ancestorPaths := currentNodeAccess.AncestorLinkResolution currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) - currentNodeAccess.AncestorLinkResolution = append(ancestorPaths, currentNodeAccess.AncestorLinkResolution...) if err != nil { - currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + if currentNodeAccess != nil { + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + } + // only expected to occur upon cycle detection return currentNodeAccess, err } } if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks { - // TODO: should we trim the node path too? - currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) - + if lastNode != nil { + lastNode.LeafLinkResolution = append(lastNode.LeafLinkResolution, nodePath...) + } return lastNode, nil } - currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) - + if currentNodeAccess != nil { + currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) + } return currentNodeAccess, nil } @@ -644,7 +651,7 @@ func (t *FileTree) setFileNode(fn *filenode.FileNode) error { if err != nil { return err } - if parentNode == nil { + if !parentNode.HasFileNode() { return fmt.Errorf("unable to find parent path=%q while adding path=%q", parentPath, fn.RealPath) } @@ -659,18 +666,18 @@ func (t *FileTree) RemovePath(path file.Path) error { return ErrRemovingRoot } - fn, err := t.node(path, linkResolutionStrategy{ + fna, err := t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: false, }) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { return nil } - _, err = t.tree.RemoveNode(fn.FileNode) + _, err = t.tree.RemoveNode(fna.FileNode) if err != nil { return err } @@ -681,18 +688,18 @@ func (t *FileTree) RemovePath(path file.Path) error { // basename is a symlink, then the symlink is followed before resolving children. If the path does not exist, this is a // nop. func (t *FileTree) RemoveChildPaths(path file.Path) error { - fn, err := t.node(path, linkResolutionStrategy{ + fna, err := t.node(path, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return err } - if fn == nil { + if !fna.HasFileNode() { // can't remove child paths for Node that doesn't exist! return nil } - for _, child := range t.tree.Children(fn.FileNode) { + for _, child := range t.tree.Children(fna.FileNode) { _, err := t.tree.RemoveNode(child) if err != nil { return err @@ -809,7 +816,7 @@ func (t *FileTree) merge(upper *FileTree) error { if err != nil { return fmt.Errorf("filetree merge failed when looking for path=%q : %w", upperNode.RealPath, err) } - if lowerNode == nil { + if !lowerNode.HasFileNode() { // there is no existing Node... add parents and prepare to set if err := t.addParentPaths(upperNode.RealPath); err != nil { return fmt.Errorf("could not add parent paths to lower: %w", err) @@ -819,11 +826,11 @@ func (t *FileTree) merge(upper *FileTree) error { nodeCopy := *upperNode // keep original file references if the upper tree does not have them (only for the same file types) - if lowerNode != nil && lowerNode.FileNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileNode.FileType { + if lowerNode.HasFileNode() && lowerNode.FileNode.Reference != nil && upperNode.Reference == nil && upperNode.FileType == lowerNode.FileNode.FileType { nodeCopy.Reference = lowerNode.FileNode.Reference } - if lowerNode != nil && upperNode.FileType != file.TypeDir && lowerNode.FileNode.FileType == file.TypeDir { + if lowerNode.HasFileNode() && upperNode.FileType != file.TypeDir && lowerNode.FileNode.FileType == file.TypeDir { // NOTE: both upperNode and lowerNode paths are the same, and does not have an effect // on removal of child paths err := t.RemoveChildPaths(upperNode.RealPath) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index db1d736a..3c05d41f 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1,13 +1,11 @@ package filetree import ( - "encoding/json" "errors" "fmt" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" - "os" "testing" "github.com/anchore/stereoscope/internal" @@ -559,14 +557,129 @@ func TestFileTree_File_MultiSymlink(t *testing.T) { // - place/wagoodman // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt + expected := &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + }, + }, + } + requestPath := "/home/wagoodman/file.txt" linkOptions := []LinkResolutionOption{FollowBasenameLinks} _, ref, err := tr.File(file.Path(requestPath), linkOptions...) require.NoError(t, err) - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - require.NoError(t, enc.Encode(ref)) - //t.Fatal("nope") + + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } + +} + +func TestFileTree_File_MultiSymlink_deadlink(t *testing.T) { + var err error + tr := NewFileTree() + + _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/link-to-1", "/1") + require.NoError(t, err) + + _, err = tr.AddDir("/1") + require.NoError(t, err) + + // causes the dead link + //_, err = tr.AddFile("/2/real-file.txt") + //require.NoError(t, err) + + _, err = tr.AddSymLink("/1/file.txt", "/2/real-file.txt") + require.NoError(t, err) + + _, err = tr.AddSymLink("/1/link-to-place", "/place") + require.NoError(t, err) + + _, err = tr.AddSymLink("/place/wagoodman/file.txt", "/link-to-1/file.txt") + require.NoError(t, err) + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + // request: /home/wagoodman/file.txt + // reference: /2/real-file.txt + // ancestor resolution: + // - /home -> /link-to-1/link-to-place + // - /link-to-1 -> /1 + // - /1/link-to-place -> /place + // leaf resolution: + // - /place/wagoodman/file.txt -> /link-to-1/file.txt + // - /link-to-1 -> /1 + // - /1/file.txt -> /2/real-file.txt + // path: + // - home -> link-to-1/link-to-place -> place + // - place/wagoodman + // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt + + expected := &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + } + + requestPath := "/home/wagoodman/file.txt" + + { + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.Nil(t, ref) + require.NoError(t, err) + } + + { + linkOptions := []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + _, ref, err := tr.File(file.Path(requestPath), linkOptions...) + require.NoError(t, err) + + // compare the remaining expectations, ignoring any reference IDs + ignoreIDs := cmpopts.IgnoreUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, ignoreIDs); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } + } } @@ -582,230 +695,262 @@ func TestFileTree_File_Symlink(t *testing.T) { expectedExists bool // if the request path should exist or not expectedErr bool // if an error is expected from the request expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" - expected *file.ReferenceVia + expected *file.ReferenceAccessVia }{ - /////////////// - //{ - // name: "request base is ABSOLUTE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "/another/place", - // buildRealPath: "/another/place", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - // requestPath: "/home", - // // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup - // expectedRealRef: true, - // expectedExists: true, - // expected: &file.ReferenceVia{ - // Reference: &file.Reference{RealPath: "/another/place"}, - // RequestPath: "/home", - // LeafResolution: []file.ReferenceAccess{ - // { - // RequestPath: "/home", - // Reference: &file.Reference{RealPath: "/home"}, - // }, - // { - // RequestPath: "/another/place", - // Reference: &file.Reference{RealPath: "/another/place"}, - // }, - // }, - // }, - //}, - //{ - // name: "request base is ABSOLUTE symlink, request no link resolution", - // buildLinkSource: "/home", - // buildLinkDest: "/another/place", - // buildRealPath: "/another/place", - // linkOptions: []LinkResolutionOption{}, - // requestPath: "/home", - // // /home is just a symlink, not the real file (which is at /another/place)... and we've provided no symlink resolution - // expectedRealRef: false, - // expectedExists: true, - // expected: &file.ReferenceVia{ - // Reference: &file.Reference{RealPath: "/home"}, // this is the real symlink - // RequestPath: "/home", - // LeafResolution: nil, - // }, - //}, - // + /////////////////// + { + name: "request base is ABSOLUTE symlink", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + requestPath: "/home", + // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup + expectedRealRef: true, + expectedExists: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + }, + }, + }, + { + name: "request base is ABSOLUTE symlink, request no link resolution", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{}, + requestPath: "/home", + // /home is just a symlink, not the real file (which is at /another/place)... and we've provided no symlink resolution + expectedRealRef: false, + expectedExists: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + LeafLinkResolution: nil, + }, + }, + ///////////////////// - //{ - // name: "request parent is ABSOLUTE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "/another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedRealRef: true, - // expected: &file.ReferenceVia{ - // Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - // RequestPath: "/home/wagoodman", - // LinkResolution: file.LinkResolution{ - // AncestorResolution: []file.ReferenceAccess{ - // { - // RequestPath: "/home", - // Reference: &file.Reference{RealPath: "/home"}, - // }, - // { - // RequestPath: "/another/place", - // Reference: nil, - // }, - // }, - // LeafResolution: []file.ReferenceAccess{ - // { - // RequestPath: "/another/place/wagoodman", - // Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - // LinkResolution: file.LinkResolution{ - // AncestorResolution: []file.ReferenceAccess{ - // { - // RequestPath: "/home", - // Reference: &file.Reference{RealPath: "/home"}, // note: this was explicitly added in the tree - // }, - // { - // RequestPath: "/another/place", - // Reference: nil, - // }, - // }, - // }, - // }, - // }, - // }, - // }, - //}, - //{ - // name: "request parent is ABSOLUTE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "/another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedResolvedPath: "/another/place/wagoodman", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - // - ///////////////// - //{ - // name: "request base is RELATIVE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "../../another/place", - // buildRealPath: "/another/place", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - // requestPath: "/home", - // expectedExists: true, - // expectedResolvedPath: "/another/place", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - //{ - // name: "request base is RELATIVE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "../../another/place/wagoodman", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{}, - // requestPath: "/home", - // expectedExists: true, - // // note that since the request matches the link source and we are NOT following, we get the link ref back - // expectedResolvedPath: "/home", - // expectedRealRef: false, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, + { + name: "request parent is ABSOLUTE symlink", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // a nop for this case (note the expected path and ref) + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) + LeafLinkResolution: nil, + }, + }, + { + name: "request parent is ABSOLUTE symlink, request no link resolution", + buildLinkSource: "/home", + buildLinkDest: "/another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // a nop for this case (note the expected path and ref) + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + // why are we seeing a result that requires link resolution but we've requested no link resolution? + // because there is always ancestor link resolution by default, and this example is only via + // ancestors, thus the leaf is still resolved (since it doesn't have a link). + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) + LeafLinkResolution: nil, + }, + }, + ///////////////// - //{ - // name: "request parent is RELATIVE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "../../another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedResolvedPath: "/another/place/wagoodman", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - //{ - // name: "request parent is RELATIVE symlink", - // buildLinkSource: "/home", - // buildLinkDest: "../../another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedResolvedPath: "/another/place/wagoodman", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, + { + name: "request base is RELATIVE symlink", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + requestPath: "/home", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + }, + }, + }, + { + name: "request base is RELATIVE symlink, no link resolution requested", + buildLinkSource: "/home", + buildLinkDest: "../../another/place/wagoodman", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, + requestPath: "/home", + expectedExists: true, + // note that since the request matches the link source and we are NOT following, we get the link ref back + expectedRealRef: false, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + LeafLinkResolution: nil, + }, + }, ///////////////// - //{ - // name: "request base is DEAD symlink", - // buildLinkSource: "/home", - // buildLinkDest: "/mwahaha/i/go/to/nowhere", - // linkOptions: []LinkResolutionOption{}, - // requestPath: "/home", - // // since we did not follow, the paths should exist to the symlink file - // expectedResolvedPath: "/home", - // expectedExists: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - //{ - // name: "request base is DEAD symlink", - // buildLinkSource: "/home", - // buildLinkDest: "/mwahaha/i/go/to/nowhere", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, - // requestPath: "/home", - // // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - // expectedResolvedPath: "/mwahaha", - // expectedExists: false, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - //{ - // name: "request base is DEAD symlink (which we don't follow)", - // buildLinkSource: "/home", - // buildLinkDest: "/mwahaha/i/go/to/nowhere", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}, - // requestPath: "/home", - // // we are following the path, which goes to nowhere.... the first failed path is resolved and returned - // expectedResolvedPath: "/home", - // expectedExists: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, + { + name: "request parent is RELATIVE symlink", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // (the symlink is for an ancestor... so we don't show link resolutions) + LeafLinkResolution: nil, + }, + }, + { + name: "request parent is RELATIVE symlink, no link resolution requested", + buildLinkSource: "/home", + buildLinkDest: "../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. + // (the symlink is for an ancestor... so we don't show link resolutions) + LeafLinkResolution: nil, + }, + }, + /////////////// + { + name: "request base is DEAD symlink, request no link resolution", + buildLinkSource: "/home", + buildLinkDest: "/mwahaha/i/go/to/nowhere", + linkOptions: []LinkResolutionOption{}, + requestPath: "/home", + // since we did not follow, the paths should exist to the symlink file + expectedExists: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + LeafLinkResolution: nil, + }, + }, + { + name: "request base is DEAD symlink", + buildLinkSource: "/home", + buildLinkDest: "/mwahaha/i/go/to/nowhere", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, + requestPath: "/home", + // we are following the path, which goes to nowhere.... the first failed path is resolved and returned + expectedExists: false, + expected: nil, + }, + { + name: "request base is DEAD symlink (which we don't follow)", + buildLinkSource: "/home", + buildLinkDest: "/mwahaha/i/go/to/nowhere", + linkOptions: []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks}, + requestPath: "/home", + // we are following the path, which goes to nowhere.... the first failed path is resolved and returned + expectedExists: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + }, + // this entry represents the dead symlink, note there is no file reference to fetch from the catalog + { + RequestPath: "/mwahaha/i/go/to/nowhere", + }, + }, + }, + }, ///////////////// - //// trying to resolve to above root - //{ - // name: "request parent is RELATIVE symlink to ABOVE root", - // buildLinkSource: "/home", - // buildLinkDest: "../../../../../../../../../../../../another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedResolvedPath: "/another/place/wagoodman", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, - //{ - // name: "request parent is RELATIVE symlink to ABOVE root", - // buildLinkSource: "/home", - // buildLinkDest: "../../../../../../../../../../../../another/place", - // buildRealPath: "/another/place/wagoodman", - // linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link - // requestPath: "/home/wagoodman", - // expectedExists: true, - // expectedResolvedPath: "/another/place/wagoodman", - // expectedRealRef: true, - // expectedAccessRequestPaths: []string{}, - // expectedAccessRealPaths: []string{}, - //}, + // trying to resolve to above root + { + name: "request parent is RELATIVE symlink to ABOVE root", + buildLinkSource: "/home", + buildLinkDest: "../../../../../../../../../../../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{FollowBasenameLinks}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + LeafLinkResolution: nil, + }, + }, + { + name: "request parent is RELATIVE symlink to ABOVE root", + buildLinkSource: "/home", + buildLinkDest: "../../../../../../../../../../../../another/place", + buildRealPath: "/another/place/wagoodman", + linkOptions: []LinkResolutionOption{}, // this is a nop since the parent is a link + requestPath: "/home/wagoodman", + expectedExists: true, + expectedRealRef: true, + expected: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + }, + LeafLinkResolution: nil, + }, + }, } for _, test := range tests { @@ -841,9 +986,9 @@ func TestFileTree_File_Symlink(t *testing.T) { } // validate the resolved reference against the real reference added to the tree - if ref.ID() == realRef.ID() && !test.expectedRealRef { + if !test.expectedRealRef && ref.HasReference() && realRef != nil && ref.ID() == realRef.ID() { t.Errorf("refs should not be the same: resolve(%+v) == reaal(%+v)", ref, realRef) - } else if ref.ID() != realRef.ID() && test.expectedRealRef { + } else if test.expectedRealRef && ref.ID() != realRef.ID() { t.Errorf("refs should be the same: resolve(%+v) != real(%+v)", ref, realRef) } diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index b40f406f..c91b044b 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -28,7 +28,7 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa // fetchFileContentsByPath is a common helper function for resolving file references for a MIME type from the file // catalog relative to the given tree. -func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.ReferenceVia, error) { +func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.ReferenceAccessVia, error) { fileEntries, err := fileCatalog.GetByMIMEType(mType) if err != nil { return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) @@ -40,7 +40,7 @@ func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType // fetchFilesByExtension is a common helper function for resolving file references for a file extension from the file // catalog relative to the given tree. -func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.ReferenceVia, error) { +func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.ReferenceAccessVia, error) { fileEntries, err := fileCatalog.GetByExtension(extension) if err != nil { return nil, fmt.Errorf("unable to fetch file references by extension (%q): %w", extension, err) @@ -51,7 +51,7 @@ func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, exte // fetchFilesByBasename is a common helper function for resolving file references for a file basename // catalog relative to the given tree. -func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.ReferenceVia, error) { +func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.ReferenceAccessVia, error) { fileEntries, err := fileCatalog.GetByBasename(basename) if err != nil { return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", basename, err) @@ -62,7 +62,7 @@ func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basen // fetchFilesByBasenameGlob is a common helper function for resolving file references for a file basename glob pattern // catalog relative to the given tree. -func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlobs ...string) ([]file.ReferenceVia, error) { +func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlobs ...string) ([]file.ReferenceAccessVia, error) { fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlobs...) if err != nil { return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlobs, err) @@ -71,8 +71,8 @@ func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, b return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) } -func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.ReferenceVia, error) { - var refs []file.ReferenceVia +func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia allFileEntries: for _, entry := range fileEntries { _, ref, err := ft.File(entry.File.RealPath, linkResolutionOpts...) @@ -80,11 +80,13 @@ allFileEntries: return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.File.RealPath, err) } + // TODO: alex think if this is correct + // if !ref.HasReference() { if ref == nil { continue } - for _, accessRef := range ref.AccessReferences() { + for _, accessRef := range ref.ResolutionReferences() { if accessRef.ID() == entry.File.ID() { // we know this entry exists in the tree, keep track of the reference for this file refs = append(refs, *ref) diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index dd8a5ed3..cc207acc 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -2,13 +2,14 @@ package image import ( "fmt" - "github.com/scylladb/go-set/strset" "io" "path" "sort" "strings" "sync" + "github.com/scylladb/go-set/strset" + "github.com/becheran/wildmatch-go" "github.com/anchore/stereoscope/pkg/file" @@ -63,10 +64,10 @@ func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener fi c.byBasename[basename] = append(c.byBasename[basename], id) c.basenames.Add(basename) - //fmt.Println("Adding file to catalog: ", f.RealPath, " (", id, ")") + // fmt.Println("Adding file to catalog: ", f.RealPath, " (", id, ")") for _, ext := range fileExtensions(string(f.RealPath)) { c.byExtension[ext] = append(c.byExtension[ext], id) - //fmt.Println(" Extensions ("+ext+"): ", c.byExtension[ext]) + // fmt.Println(" Extensions ("+ext+"): ", c.byExtension[ext]) } c.catalog[id] = FileCatalogEntry{ diff --git a/pkg/image/image.go b/pkg/image/image.go index 031f72c0..4711c4a9 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -268,8 +268,8 @@ func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. -func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceVia, error) { - var refs []file.ReferenceVia +func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(i.SquashedTree(), &i.FileCatalog, ty) if err != nil { @@ -281,17 +281,17 @@ func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference } // FilesByExtensionFromSquash returns file references for files that have the given extension relative to the squash tree. -func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.ReferenceVia, error) { +func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.ReferenceAccessVia, error) { return fetchFilesByExtension(i.SquashedTree(), &i.FileCatalog, extension) } // FilesByBasenameFromSquash returns file references for files with the given basename relative to the squash tree. -func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.ReferenceVia, error) { +func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasename(i.SquashedTree(), &i.FileCatalog, basename) } // FilesByBasenameGlobFromSquash returns file references for files with the given basename glob pattern relative to the squash tree. -func (i *Image) FilesByBasenameGlobFromSquash(globs ...string) ([]file.ReferenceVia, error) { +func (i *Image) FilesByBasenameGlobFromSquash(globs ...string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasenameGlob(i.SquashedTree(), &i.FileCatalog, globs...) } @@ -304,7 +304,7 @@ func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { // ResolveLinkByLayerSquash resolves a symlink or hardlink for the given file reference relative to the result from // the layer squash of the given layer index argument. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.ReferenceVia, error) { +func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.ReferenceAccessVia, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[layer].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err @@ -312,7 +312,7 @@ func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options // ResolveLinkByImageSquash resolves a symlink or hardlink for the given file reference relative to the result from the image squash. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.ReferenceVia, error) { +func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.ReferenceAccessVia, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[len(i.Layers)-1].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 815a2386..5d7c6789 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -155,8 +155,8 @@ func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { } // FilesByMIMEType returns file references for files that match at least one of the given MIME types relative to each layer tree. -func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.ReferenceVia, error) { - var refs []file.ReferenceVia +func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(l.Tree, l.fileCatalog, ty) if err != nil { @@ -168,8 +168,8 @@ func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.ReferenceVia, error } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types relative to the squashed file tree representation. -func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceVia, error) { - var refs []file.ReferenceVia +func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia for _, ty := range mimeTypes { refsForType, err := fetchFilesByMIMEType(l.SquashedTree, l.fileCatalog, ty) if err != nil { @@ -181,32 +181,32 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference } // FilesByExtension returns file references for files that have the given extension. -func (l *Layer) FilesByExtension(extension string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByExtension(extension string) ([]file.ReferenceAccessVia, error) { return fetchFilesByExtension(l.Tree, l.fileCatalog, extension) } // FilesByExtensionFromSquash returns file references for files have the given extension relative to the squash tree. -func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.ReferenceAccessVia, error) { return fetchFilesByExtension(l.SquashedTree, l.fileCatalog, extension) } // FilesByBasename returns file references for files that have the following basename. -func (l *Layer) FilesByBasename(basename string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByBasename(basename string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasename(l.Tree, l.fileCatalog, basename) } // FilesByBasenameFromSquash returns file references for files by name relative to the squash tree. -func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasename(l.SquashedTree, l.fileCatalog, extension) } // FilesByBasenameGlob returns file references for files that have the following basename glob. -func (l *Layer) FilesByBasenameGlob(glob string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByBasenameGlob(glob string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasenameGlob(l.Tree, l.fileCatalog, glob) } // FilesByBasenameGlobFromSquash returns file references for files by basename glob pattern relative to the squash tree. -func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.ReferenceVia, error) { +func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.ReferenceAccessVia, error) { return fetchFilesByBasenameGlob(l.SquashedTree, l.fileCatalog, glob) } diff --git a/test/integration/fixture_image_opaque_directory_test.go b/test/integration/fixture_image_opaque_directory_test.go index cccc4207..2e58fe2b 100644 --- a/test/integration/fixture_image_opaque_directory_test.go +++ b/test/integration/fixture_image_opaque_directory_test.go @@ -17,7 +17,7 @@ func TestImage_SquashedTree_OpaqueDirectoryExistsInFileCatalog(t *testing.T) { t.Fatalf("unable to get file=%q : %+v", path, err) } - _, err = image.FileCatalog.Get(*ref) + _, err = image.FileCatalog.Get(*ref.Reference) if err != nil { t.Fatal(err) } diff --git a/test/integration/fixture_image_symlinks_test.go b/test/integration/fixture_image_symlinks_test.go index 1c2280da..a192396b 100644 --- a/test/integration/fixture_image_symlinks_test.go +++ b/test/integration/fixture_image_symlinks_test.go @@ -128,11 +128,11 @@ func fetchRefs(t *testing.T, i *image.Image, cfg linkFetchConfig) (*file.Referen t.Fatalf("missing expected path: %s", expectedResolve) } - actualResolve, err := i.ResolveLinkByLayerSquash(*link, cfg.perspectiveLayer, cfg.linkOptions...) + actualResolve, err := i.ResolveLinkByLayerSquash(*link.Reference, cfg.perspectiveLayer, cfg.linkOptions...) if err != nil { t.Fatalf("failed to resolve link=%+v: %+v", link, err) } - return expectedResolve, actualResolve + return expectedResolve.Reference, actualResolve.Reference } func fetchContents(t *testing.T, i *image.Image, cfg linkFetchConfig) string { From 3583f6b842f8fe435469b7c485db5a6f53e06d5f Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 14:31:25 -0500 Subject: [PATCH 04/35] add filetree search context Signed-off-by: Alex Goodman --- pkg/file/reference.go | 26 ++ pkg/filetree/filetree.go | 40 +- pkg/filetree/filetree_test.go | 4 +- pkg/filetree/glob.go | 7 - pkg/filetree/glob_parser.go | 235 ++++++++++ pkg/filetree/glob_parser_test.go | 458 +++++++++++++++++++ pkg/filetree/index.go | 234 ++++++++++ pkg/filetree/search.go | 163 +++++++ pkg/image/content_helpers.go | 73 --- pkg/image/file_catalog.go | 222 +-------- pkg/image/file_catalog_test.go | 149 +++--- pkg/image/image.go | 44 +- pkg/image/image_test.go | 9 +- pkg/image/layer.go | 64 +-- test/integration/mime_type_detection_test.go | 2 +- 15 files changed, 1308 insertions(+), 422 deletions(-) create mode 100644 pkg/filetree/glob_parser.go create mode 100644 pkg/filetree/glob_parser_test.go create mode 100644 pkg/filetree/index.go create mode 100644 pkg/filetree/search.go diff --git a/pkg/file/reference.go b/pkg/file/reference.go index a2c0810c..904cd022 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -2,6 +2,9 @@ package file import ( "fmt" + "sort" + + "github.com/scylladb/go-set/strset" ) var nextID = 0 @@ -28,6 +31,29 @@ func (f *ReferenceAccessVia) HasReference() bool { return f.Reference != nil } +func (f *ReferenceAccessVia) AllPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + if f.Reference != nil { + set.Add(string(f.Reference.RealPath)) + } + for _, p := range f.LeafLinkResolution { + set.Add(string(p.RequestPath)) + if p.Reference != nil { + set.Add(string(p.Reference.RealPath)) + } + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + // RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. func (f *ReferenceAccessVia) RequestResolutionPath() []Path { var paths []Path diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 420df97c..e85efc1a 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -18,6 +18,26 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") +type Reader interface { + AllRealPaths() []file.Path + AllFiles(types ...file.Type) []file.Reference + ListPaths(dir file.Path) ([]file.Path, error) + File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) + Reader() tree.Reader + Equal(other *FileTree) bool + PathDiff(other *FileTree) (extra, missing []file.Path) + Walk(fn func(path file.Path, f filenode.FileNode) error, conditions *WalkConditions) error + HasPath(path file.Path, options ...LinkResolutionOption) bool +} + +type Writer interface { + AddFile(realPath file.Path) (*file.Reference, error) + AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddDir(realPath file.Path) (*file.Reference, error) + RemovePath(path file.Path) error +} + // nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. type nodeAccess struct { RequestPath file.Path @@ -412,8 +432,8 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, } // FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks). -func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]GlobResult, error) { - results := make([]GlobResult, 0) +func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { + results := make([]file.ReferenceAccessVia, 0) if len(query) == 0 { return nil, fmt.Errorf("no glob pattern given") @@ -456,16 +476,14 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ } // the Node must exist and should not be a directory if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDir { - result := GlobResult{ - MatchPath: matchPath, - RealPath: fna.FileNode.RealPath, - // we should not be given a link Node UNLESS it is dead - IsDeadLink: fna.FileNode.IsLink(), - } - if fna.FileNode.Reference != nil { - result.Reference = *fna.FileNode.Reference + result := file.NewFileReferenceVia( + matchPath, + fna.FileNode.Reference, + newReferenceAccessPath(fna.LeafLinkResolution), + ) + if result != nil { + results = append(results, *result) } - results = append(results, result) } } diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 3c05d41f..657942bd 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -307,7 +307,7 @@ func TestFileTree_FilesByGlob(t *testing.T) { expectedSet := internal.NewStringSet() for _, r := range actual { - actualSet.Add(string(r.MatchPath)) + actualSet.Add(string(r.RequestPath)) } for _, e := range test.expected { @@ -318,7 +318,7 @@ func TestFileTree_FilesByGlob(t *testing.T) { } for _, r := range actual { - if !expectedSet.Contains(string(r.MatchPath)) { + if !expectedSet.Contains(string(r.RequestPath)) { t.Errorf("extra search hit: %+v", r) } } diff --git a/pkg/filetree/glob.go b/pkg/filetree/glob.go index 72611c80..a68fba2e 100644 --- a/pkg/filetree/glob.go +++ b/pkg/filetree/glob.go @@ -19,13 +19,6 @@ var _ fs.FS = (*osAdapter)(nil) var _ fs.FileInfo = (*fileinfoAdapter)(nil) var _ fs.DirEntry = (*fileinfoAdapter)(nil) -type GlobResult struct { - MatchPath file.Path - RealPath file.Path - IsDeadLink bool - Reference file.Reference -} - // fileAdapter is an object meant to implement the doublestar.File for getting Lstat results for an entire directory. type fileAdapter struct { os *osAdapter diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go new file mode 100644 index 00000000..59cb0a96 --- /dev/null +++ b/pkg/filetree/glob_parser.go @@ -0,0 +1,235 @@ +package filetree + +import ( + "regexp" + "strings" +) + +const ( + searchByGlob searchBasis = iota + searchByPath + searchByExtension + searchByBasename + searchByBasenameGlob +) + +type searchBasis int + +func (s searchBasis) String() string { + switch s { + case searchByGlob: + return "glob" + case searchByPath: + return "path" + case searchByExtension: + return "extension" + case searchByBasename: + return "basename" + case searchByBasenameGlob: + return "basename-glob" + } + return "unknown search basis" +} + +type searchRequest struct { + searchBasis + value string + requirement string +} + +func parseGlob(glob string) searchRequest { + glob = cleanGlob(glob) + + if !strings.ContainsAny(glob, "*?") { + return searchRequest{ + searchBasis: searchByPath, + value: glob, + } + } + + basenameSplitAt := strings.LastIndex(glob, "/") + + var basename string + if basenameSplitAt == -1 { + // note: this has no glob path prefix, thus no requirement... + // this can only be a basename, basename glob, or extension + basename = glob + } else if basenameSplitAt < len(glob)-1 { + basename = glob[basenameSplitAt+1:] + } + + request := parseGlobBasename(basename) + + requirement := glob + if basenameSplitAt == -1 { + requirement = "" + } else if basenameSplitAt < len(glob)-1 { + requirementSection := glob[:basenameSplitAt] + switch requirementSection { + case "**", request.requirement: + requirement = "" + } + } + + request.requirement = requirement + + if request.searchBasis == searchByGlob { + request.value = glob + if glob == request.requirement { + request.requirement = "" + } + } + + return request +} + +func parseGlobBasename(input string) searchRequest { + extensionFields := strings.Split(input, "*.") + if len(extensionFields) == 2 && extensionFields[0] == "" { + possibleExtension := extensionFields[1] + if !strings.ContainsAny(possibleExtension, "*?") { + // special case, this is plain extension + return searchRequest{ + searchBasis: searchByExtension, + value: "." + possibleExtension, + } + } + } + + if !strings.ContainsAny(input, "*?") { + // special case, this is plain extension + return searchRequest{ + searchBasis: searchByBasename, + value: input, + } + } + + if strings.ReplaceAll(strings.ReplaceAll(input, "?", ""), "*", "") == "" { + // special case, this is a glob that is only asterisks... do not process! + return searchRequest{ + searchBasis: searchByGlob, + } + } + + return searchRequest{ + searchBasis: searchByBasenameGlob, + value: input, + } +} + +func cleanGlob(glob string) string { + glob = strings.TrimSpace(glob) + glob = removeRedundantCountGlob(glob, '/', 1) + glob = removeRedundantCountGlob(glob, '*', 2) + if len(glob) > 1 { + // input case: / + // then preserve the slash + glob = strings.TrimRight(glob, "/") + } + // e.g. replace "/bar**/" with "/bar*/" + glob = simplifyMultipleGlobAsterisks(glob) + glob = simplifyGlobRecursion(glob) + return glob +} + +func simplifyMultipleGlobAsterisks(glob string) string { + // this will replace any recursive globs (**) that are not clearly indicating recursive tree searches with a single * + + var sb strings.Builder + var asteriskBuff strings.Builder + var withinRecursiveStreak bool + + for idx, c := range glob { + isAsterisk := c == '*' + isSlash := c == '/' + + // special case, this is the first character in the glob and it is an asterisk... + // treat this like a recursive streak + if idx == 0 && isAsterisk { + withinRecursiveStreak = true + asteriskBuff.WriteRune(c) + continue + } + + if isAsterisk { + asteriskBuff.WriteRune(c) + continue + } + + if isSlash { + if withinRecursiveStreak { + // this is a confirmed recursive streak + // keep all asterisks! + sb.WriteString(asteriskBuff.String()) + asteriskBuff.Reset() + } + + if asteriskBuff.Len() > 0 { + // this is NOT a recursive streak, but there are asterisks + // keep only one asterisk + sb.WriteRune('*') + asteriskBuff.Reset() + } + + // this is potentially a new streak... + withinRecursiveStreak = true + } else { + // ... and this is NOT a recursive streak + if asteriskBuff.Len() > 0 { + // ... keep only one asterisk, since it's not recursive + sb.WriteRune('*') + } + asteriskBuff.Reset() + withinRecursiveStreak = false + } + + sb.WriteRune(c) + } + + if asteriskBuff.Len() > 0 { + if withinRecursiveStreak { + sb.WriteString(asteriskBuff.String()) + } else { + sb.WriteRune('*') + } + } + + return sb.String() +} + +var globRecursionRightPattern = regexp.MustCompile(`(\*\*/?)+`) + +func simplifyGlobRecursion(glob string) string { + // this function assumes that all redundant asterisks have been removed (e.g. /****/ -> /**/) + // and that all seemingly recursive globs have been replaced with a single asterisk (e.g. /bar**/ -> /bar*/) + glob = globRecursionRightPattern.ReplaceAllString(glob, "**/") + glob = strings.ReplaceAll(glob, "//", "/") + if strings.HasPrefix(glob, "/**/") { + glob = strings.TrimPrefix(glob, "/") + } + if len(glob) > 1 { + // input case: /** + // then preserve the slash + glob = strings.TrimRight(glob, "/") + } + return glob +} + +func removeRedundantCountGlob(glob string, val rune, count int) string { + var sb strings.Builder + + var streak int + for _, c := range glob { + if c == val { + streak++ + if streak > count { + continue + } + } else { + streak = 0 + } + + sb.WriteRune(c) + } + return sb.String() +} diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go new file mode 100644 index 00000000..53ea0208 --- /dev/null +++ b/pkg/filetree/glob_parser_test.go @@ -0,0 +1,458 @@ +package filetree + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func Test_parseGlob(t *testing.T) { + + tests := []struct { + name string + glob string + want searchRequest + }{ + { + name: "relative path", + glob: "foo/bar/basename.txt", + want: searchRequest{ + searchBasis: searchByPath, + value: "foo/bar/basename.txt", + }, + }, + { + name: "absolute path", + glob: "/foo/bar/basename.txt", + want: searchRequest{ + searchBasis: searchByPath, + value: "/foo/bar/basename.txt", + }, + }, + { + name: "extension", + glob: "*.txt", + want: searchRequest{ + searchBasis: searchByExtension, + value: ".txt", + }, + }, + { + name: "extension anywhere", + glob: "**/*.txt", + want: searchRequest{ + searchBasis: searchByExtension, + value: ".txt", + }, + }, + { + name: "basename glob search with requirement", + glob: "bas*nam?.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, + }, + { + name: "extension with path requirement", + glob: "foo/bar/**/*.txt", + want: searchRequest{ + searchBasis: searchByExtension, + value: ".txt", + requirement: "foo/bar/**/*.txt", + }, + }, + { + name: "basename but without a path prefix", + glob: "basename.txt", + want: searchRequest{ + searchBasis: searchByPath, + value: "basename.txt", + }, + }, + { + name: "basename anywhere", + glob: "**/basename.txt", + want: searchRequest{ + searchBasis: searchByBasename, + value: "basename.txt", + }, + }, + { + name: "basename with requirement", + glob: "foo/b*/basename.txt", + want: searchRequest{ + searchBasis: searchByBasename, + value: "basename.txt", + requirement: "foo/b*/basename.txt", + }, + }, + { + name: "basename glob", + glob: "basename.*", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "basename.*", + }, + }, + { + name: "basename glob with requirement", + glob: "**/foo/bar/basename.*", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "basename.*", + requirement: "**/foo/bar/basename.*", + }, + }, + { + name: "basename wildcard glob with requirement", + glob: "**/foo/bar/basenam?.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "basenam?.txt", + requirement: "**/foo/bar/basenam?.txt", + }, + }, + { + name: "fallback to full glob search", + glob: "**/foo/bar/*", + want: searchRequest{ + searchBasis: searchByGlob, + value: "**/foo/bar/*", + }, + }, + // edge cases + { + name: "empty string", + glob: "", + want: searchRequest{ + searchBasis: searchByPath, + }, + }, + { + name: "only a slash", + glob: "/", + want: searchRequest{ + searchBasis: searchByPath, + value: "/", + }, + }, + { + name: "cleanup to single slash", + glob: "///", + want: searchRequest{ + searchBasis: searchByPath, + value: "/", + }, + }, + { + name: "ends with slash", + glob: "/foo/b*r/", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "b*r", + requirement: "/foo/b*r", // note that the slash is removed since this should be a clean path + }, + }, + { + name: "spaces around everything", + glob: " /foo/b*r/ .txt ", + want: searchRequest{ + searchBasis: searchByBasename, + value: " .txt", // note the space + requirement: "/foo/b*r/ .txt", // note the space in the middle, but otherwise clean on the front and back + }, + }, + { + name: "fallback to full glob search", + glob: "**/foo/bar/***.*****.******", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "*.*.*", // note that the basename glob is cleaned up + requirement: "**/foo/bar/*.*.*", // note that the glob is cleaned up + }, + }, + { + name: "odd glob input still honors basename searches", + glob: "**/foo/**.***.****bar/***thin*.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", // note that the basename glob is cleaned up + requirement: "**/foo/*.*.*bar/*thin*.txt", // note that the glob is cleaned up + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, parseGlob(tt.glob), "parseGlob(%v)", tt.glob) + }) + } +} + +func Test_parseGlobBasename(t *testing.T) { + tests := []struct { + name string + input string + want searchRequest + }{ + { + name: "empty string", + input: "", + want: searchRequest{ + searchBasis: searchByBasename, + }, + }, + { + name: "everything-ish", + input: "*?", + want: searchRequest{ + searchBasis: searchByGlob, + }, + }, + { + name: "everything recursive", + input: "**", + want: searchRequest{ + searchBasis: searchByGlob, + }, + }, + { + name: "simple basename", + input: "basename.txt", + want: searchRequest{ + searchBasis: searchByBasename, + value: "basename.txt", + }, + }, + { + name: "basename with prefix glob", + input: "*basename.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "*basename.txt", + }, + }, + { + name: "basename with pattern", + input: "bas*nam?.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, + }, + { + name: "extension", + input: "*.txt", + want: searchRequest{ + searchBasis: searchByExtension, + value: ".txt", + }, + }, + { + name: "possible extension that should be searched by glob", + input: "*.*.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "*.*.txt", + }, + }, + { + name: "tricky basename", + input: ".txt", + want: searchRequest{ + searchBasis: searchByBasename, + value: ".txt", + }, + }, + { + name: "basename glob with extension", + input: "*thin*.txt", + want: searchRequest{ + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, parseGlobBasename(tt.input), "parseGlobBasename(%v)", tt.input) + }) + } +} + +func Test_cleanGlob(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "empty string", + glob: "", + want: "", + }, + { + name: "remove spaces from glob edges", + glob: " **/foo/ **/ bar.txt ", + want: "**/foo/ */ bar.txt", + }, + { + name: "simplify slashes", + glob: "///foo/////**///**////", + want: "/foo/**", + }, + { + name: "simplify larger recursive glob", + glob: "**/foo/**/*/***/*bar**/***.*****.******", + want: "**/foo/**/*/**/*bar*/*.*.*", + }, + { + name: "simplify glob prefix", + glob: "***/foo.txt", + want: "**/foo.txt", + }, + { + name: "simplify glob within multiple path", + glob: "bar**/ba**r*/***/**/bar***/**/foo.txt", + want: "bar*/ba*r*/**/bar*/**/foo.txt", + }, + { + name: "simplify prefix and suffix glob", + glob: "***/foo/**/****", + want: "**/foo/**", + }, + { + name: "simplify multiple recursive requests", + glob: "/**/**/foo/**/**", + want: "**/foo/**", + }, + { + name: "simplify slashes and asterisks", + glob: "/***/****///foo/**//****////", + want: "**/foo/**", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, cleanGlob(tt.glob), "cleanGlob(%v)", tt.glob) + }) + } +} + +func Test_removeRedundantCountGlob(t *testing.T) { + type args struct { + glob string + val rune + count int + } + tests := []struct { + name string + args args + want string + }{ + { + name: "empty string", + args: args{ + glob: "", + val: '*', + count: 1, + }, + want: "", + }, + { + name: "simplify on edges and body", + args: args{ + glob: "**/foo/***/****", + val: '*', + count: 2, + }, + want: "**/foo/**/**", + }, + { + name: "simplify slashes", + args: args{ + glob: "///something/**///here?/*/will//work///", + val: '/', + count: 1, + }, + want: "/something/**/here?/*/will/work/", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, removeRedundantCountGlob(tt.args.glob, tt.args.val, tt.args.count), "removeRedundantCountGlob(%v, %v, %v)", tt.args.glob, tt.args.val, tt.args.count) + }) + } +} + +func Test_simplifyMultipleGlobAsterisks(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "simplify glob suffix", + glob: "foo/.***", + want: "foo/.*", + }, + { + name: "simplify glob within path", + glob: "**/bar**/foo.txt", + want: "**/bar*/foo.txt", + }, + { + name: "simplify glob within multiple path", + glob: "bar**/ba**r*/**/**/bar**/**/foo.txt", + want: "bar*/ba*r*/**/**/bar*/**/foo.txt", + }, + { + name: "simplify glob within path prefix", + glob: "bar**/foo.txt", + want: "bar*/foo.txt", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, simplifyMultipleGlobAsterisks(tt.glob), "simplifyMultipleGlobAsterisks(%v)", tt.glob) + }) + } +} + +func Test_simplifyGlobRecursion(t *testing.T) { + tests := []struct { + name string + glob string + want string + }{ + { + name: "single instance with slash prefix", + glob: "/**", + want: "**", + }, + { + name: "single instance with slash suffix", + glob: "**/", + want: "**", + }, + { + name: "no slash prefix", + glob: "**/**/fo*o/**/**", + want: "**/fo*o/**", + }, + { + name: "within body", + glob: "/fo*o/**/**/bar", + want: "/fo*o/**/bar", + }, + { + name: "with slash prefix", + glob: "/**/**/foo/**/**", + want: "**/foo/**", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, simplifyGlobRecursion(tt.glob), "simplifyGlobRecursion(%v)", tt.glob) + }) + } +} diff --git a/pkg/filetree/index.go b/pkg/filetree/index.go new file mode 100644 index 00000000..c32ab0d5 --- /dev/null +++ b/pkg/filetree/index.go @@ -0,0 +1,234 @@ +package filetree + +import ( + "fmt" + "os" + "path" + "strings" + "sync" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/becheran/wildmatch-go" + "github.com/scylladb/go-set/strset" +) + +type Index interface { + IndexReader + IndexWriter +} + +type IndexReader interface { + Exists(f file.Reference) bool + Get(f file.Reference) (IndexEntry, error) + GetByMIMEType(mType string) ([]IndexEntry, error) + GetByExtension(extension string) ([]IndexEntry, error) + GetByBasename(basename string) ([]IndexEntry, error) + GetByBasenameGlob(globs ...string) ([]IndexEntry, error) + Basenames() []string +} + +type IndexWriter interface { + Add(f file.Reference, m file.Metadata) +} + +// Index represents all file metadata and source tracing for all files contained within the image layer +// blobs (i.e. everything except for the image index/manifest/metadata files). +type index struct { + *sync.RWMutex + index map[file.ID]IndexEntry + byMIMEType map[string][]file.ID + byExtension map[string][]file.ID + byBasename map[string][]file.ID + basenames *strset.Set +} + +// NewIndex returns an empty Index. +func NewIndex() Index { + return &index{ + RWMutex: &sync.RWMutex{}, + index: make(map[file.ID]IndexEntry), + byMIMEType: make(map[string][]file.ID), + byExtension: make(map[string][]file.ID), + byBasename: make(map[string][]file.ID), + basenames: strset.New(), + } +} + +// IndexEntry represents all stored metadata for a single file reference. +type IndexEntry struct { + file.Reference + file.Metadata +} + +// Add creates a new IndexEntry for the given file reference and metadata, cataloged by the ID of the +// file reference (overwriting any existing entries without warning). +func (c *index) Add(f file.Reference, m file.Metadata) { + c.Lock() + defer c.Unlock() + id := f.ID() + + if m.MIMEType != "" { + // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have + // the contents and the MIME type could not be determined then the default value is application/octet-stream. + c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], id) + } + + basename := path.Base(string(f.RealPath)) + c.byBasename[basename] = append(c.byBasename[basename], id) + c.basenames.Add(basename) + + for _, ext := range fileExtensions(string(f.RealPath)) { + c.byExtension[ext] = append(c.byExtension[ext], id) + } + + c.index[id] = IndexEntry{ + Reference: f, + Metadata: m, + } +} + +// Exists indicates if the given file reference exists in the index. +func (c *index) Exists(f file.Reference) bool { + c.RLock() + defer c.RUnlock() + _, ok := c.index[f.ID()] + return ok +} + +// Get fetches a IndexEntry for the given file reference, or returns an error if the file reference has not +// been added to the index. +func (c *index) Get(f file.Reference) (IndexEntry, error) { + c.RLock() + defer c.RUnlock() + value, ok := c.index[f.ID()] + if !ok { + return IndexEntry{}, os.ErrNotExist + } + return value, nil +} + +func (c *index) Basenames() []string { + c.RLock() + defer c.RUnlock() + + return c.basenames.List() +} + +func (c *index) GetByMIMEType(mType string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + fileIDs, ok := c.byMIMEType[mType] + if !ok { + return nil, nil + } + + var entries []IndexEntry + for _, id := range fileIDs { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + + return entries, nil +} + +func (c *index) GetByExtension(extension string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + fileIDs, ok := c.byExtension[extension] + if !ok { + return nil, nil + } + + var entries []IndexEntry + for _, id := range fileIDs { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + + return entries, nil +} + +func (c *index) GetByBasename(basename string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + if strings.Contains(basename, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + fileIDs, ok := c.byBasename[basename] + if !ok { + return nil, nil + } + + var entries []IndexEntry + for _, id := range fileIDs { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } + + return entries, nil +} + +func (c *index) GetByBasenameGlob(globs ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + + var entries []IndexEntry + for _, glob := range globs { + if strings.Contains(glob, "**") { + return nil, fmt.Errorf("basename glob patterns with '**' are not supported") + } + if strings.Contains(glob, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + patternObj := wildmatch.NewWildMatch(glob) + for _, b := range c.Basenames() { + if patternObj.IsMatch(b) { + bns, err := c.GetByBasename(b) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) + } + entries = append(entries, bns...) + } + } + } + + return entries, nil +} + +func fileExtensions(p string) []string { + var exts []string + p = strings.TrimSpace(p) + + // ignore oddities + if strings.HasSuffix(p, ".") { + return exts + } + + // ignore directories + if strings.HasSuffix(p, "/") { + return exts + } + + // ignore . which indicate a hidden file + p = strings.TrimLeft(path.Base(p), ".") + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '.' { + exts = append(exts, p[i:]) + } + } + return exts +} diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go new file mode 100644 index 00000000..7089f6e7 --- /dev/null +++ b/pkg/filetree/search.go @@ -0,0 +1,163 @@ +package filetree + +import ( + "fmt" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/bmatcuk/doublestar/v4" +) + +// Searcher is a facade for searching a file tree with optional indexing support. +type Searcher interface { + SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) + SearchByGlob(patterns string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) + SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) +} + +type searchContext struct { + tree *FileTree // this is the tree which all index search results are filtered against + index Index // this index is relative to one or more trees, not just necessarily one +} + +func NewSearchContext(tree *FileTree, index Index) Searcher { + return &searchContext{ + tree: tree, + index: index, + } +} + +func (i searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) { + // TODO: one day this could leverage indexes outside of the tree, but today this is not implemented + options = append(options, FollowBasenameLinks) + _, ref, err := i.tree.File(file.Path(path), options...) + return ref, err +} + +func (i searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { + var fileEntries []IndexEntry + + for _, mType := range mimeTypes { + entries, err := i.index.GetByMIMEType(mType) + if err != nil { + return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) + } + fileEntries = append(fileEntries, entries...) + } + + return i.filterIndexEntriesRelativeToTree(fileEntries) +} + +func (i searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { + if i.index == nil { + options = append(options, FollowBasenameLinks) + return i.tree.FilesByGlob(pattern, options...) + } + + return i.searchByGlob(parseGlob(pattern), options...) +} + +func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { + switch request.searchBasis { + case searchByPath: + options = append(options, FollowBasenameLinks) + ref, err := i.SearchByPath(request.value, options...) + if err != nil { + return nil, err + } + if ref == nil { + return nil, nil + } + return []file.ReferenceAccessVia{*ref}, nil + case searchByBasename: + indexes, err := i.index.GetByBasename(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by basename=%q: %w", request.value, err) + } + refs, err := i.filterIndexEntries(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchByBasenameGlob: + indexes, err := i.index.GetByBasenameGlob(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by basename-glob=%q: %w", request.value, err) + } + refs, err := i.filterIndexEntries(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchByExtension: + indexes, err := i.index.GetByExtension(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) + } + refs, err := i.filterIndexEntries(request.requirement, indexes) + if err != nil { + return nil, err + } + return refs, nil + case searchByGlob: + options = append(options, FollowBasenameLinks) + return i.tree.FilesByGlob(request.value, options...) + } + + return nil, fmt.Errorf("invalid search request: %+v", request.searchBasis) +} + +func (i searchContext) filterIndexEntries(requirement string, entries []IndexEntry) ([]file.ReferenceAccessVia, error) { + refs, err := i.filterIndexEntriesRelativeToTree(entries) + if err != nil { + return nil, err + } + + var results []file.ReferenceAccessVia + for _, ref := range refs { + if requirement != "" { + var foundMatchingRequirement bool + for _, p := range ref.AllPaths() { + matched, err := doublestar.Match(requirement, string(p)) + if err != nil { + return nil, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) + } + if matched { + foundMatchingRequirement = true + break + } + } + if !foundMatchingRequirement { + continue + } + } + results = append(results, ref) + } + + return results, nil +} + +func (i searchContext) filterIndexEntriesRelativeToTree(fileEntries []IndexEntry) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia +allFileEntries: + for _, entry := range fileEntries { + _, ref, err := i.tree.File(entry.Reference.RealPath, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.Reference.RealPath, err) + } + + if !ref.HasReference() { + continue + } + + for _, accessRef := range ref.ResolutionReferences() { + if accessRef.ID() == entry.Reference.ID() { + // we know this entry exists in the tree, keep track of the reference for this file + refs = append(refs, *ref) + continue allFileEntries + } + } + + // we did not find a matching file ID in the tree, so drop this entry + } + return refs, nil +} diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index c91b044b..92728f93 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -25,76 +25,3 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa } return reader, nil } - -// fetchFileContentsByPath is a common helper function for resolving file references for a MIME type from the file -// catalog relative to the given tree. -func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.ReferenceAccessVia, error) { - fileEntries, err := fileCatalog.GetByMIMEType(mType) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) - } - - // since this query is related to the contents of the path, this should be a strict file ID match - return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) -} - -// fetchFilesByExtension is a common helper function for resolving file references for a file extension from the file -// catalog relative to the given tree. -func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.ReferenceAccessVia, error) { - fileEntries, err := fileCatalog.GetByExtension(extension) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by extension (%q): %w", extension, err) - } - - return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) -} - -// fetchFilesByBasename is a common helper function for resolving file references for a file basename -// catalog relative to the given tree. -func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.ReferenceAccessVia, error) { - fileEntries, err := fileCatalog.GetByBasename(basename) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", basename, err) - } - - return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) -} - -// fetchFilesByBasenameGlob is a common helper function for resolving file references for a file basename glob pattern -// catalog relative to the given tree. -func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlobs ...string) ([]file.ReferenceAccessVia, error) { - fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlobs...) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlobs, err) - } - - return filterCatalogFilesRelativesToTree(ft, fileEntries, filetree.FollowBasenameLinks) -} - -func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry, linkResolutionOpts ...filetree.LinkResolutionOption) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia -allFileEntries: - for _, entry := range fileEntries { - _, ref, err := ft.File(entry.File.RealPath, linkResolutionOpts...) - if err != nil { - return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.File.RealPath, err) - } - - // TODO: alex think if this is correct - // if !ref.HasReference() { - if ref == nil { - continue - } - - for _, accessRef := range ref.ResolutionReferences() { - if accessRef.ID() == entry.File.ID() { - // we know this entry exists in the tree, keep track of the reference for this file - refs = append(refs, *ref) - continue allFileEntries - } - } - - // we did not find a matching file ID in the tree, so drop this entry - } - return refs, nil -} diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index cc207acc..f9ccd713 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -3,206 +3,47 @@ package image import ( "fmt" "io" - "path" - "sort" - "strings" "sync" - "github.com/scylladb/go-set/strset" - - "github.com/becheran/wildmatch-go" - "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" ) -var ErrFileNotFound = fmt.Errorf("could not find file") - // FileCatalog represents all file metadata and source tracing for all files contained within the image layer // blobs (i.e. everything except for the image index/manifest/metadata files). type FileCatalog struct { - sync.RWMutex - catalog map[file.ID]FileCatalogEntry - byMIMEType map[string][]file.ID - byExtension map[string][]file.ID - byBasename map[string][]file.ID - basenames *strset.Set -} - -// FileCatalogEntry represents all stored metadata for a single file reference. -type FileCatalogEntry struct { - File file.Reference - Metadata file.Metadata - Layer *Layer - Contents file.Opener + *sync.RWMutex + filetree.Index + layerByID map[file.ID]*Layer + openerByID map[file.ID]file.Opener } // NewFileCatalog returns an empty FileCatalog. -func NewFileCatalog() FileCatalog { - return FileCatalog{ - catalog: make(map[file.ID]FileCatalogEntry), - byMIMEType: make(map[string][]file.ID), - byExtension: make(map[string][]file.ID), - byBasename: make(map[string][]file.ID), - basenames: strset.New(), +func NewFileCatalog() *FileCatalog { + return &FileCatalog{ + RWMutex: &sync.RWMutex{}, + Index: filetree.NewIndex(), + layerByID: make(map[file.ID]*Layer), + openerByID: make(map[file.ID]file.Opener), } } // Add creates a new FileCatalogEntry for the given file reference and metadata, cataloged by the ID of the // file reference (overwriting any existing entries without warning). func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener file.Opener) { + c.Index.Add(f, m) c.Lock() defer c.Unlock() id := f.ID() - - if m.MIMEType != "" { - // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have - // the contents and the MIME type could not be determined then the default value is application/octet-stream. - c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], id) - } - - basename := path.Base(string(f.RealPath)) - c.byBasename[basename] = append(c.byBasename[basename], id) - c.basenames.Add(basename) - - // fmt.Println("Adding file to catalog: ", f.RealPath, " (", id, ")") - for _, ext := range fileExtensions(string(f.RealPath)) { - c.byExtension[ext] = append(c.byExtension[ext], id) - // fmt.Println(" Extensions ("+ext+"): ", c.byExtension[ext]) - } - - c.catalog[id] = FileCatalogEntry{ - File: f, - Metadata: m, - Layer: l, - Contents: opener, - } + c.layerByID[id] = l + c.openerByID[id] = opener } -// Exists indicates if the given file reference exists in the catalog. -func (c *FileCatalog) Exists(f file.Reference) bool { +func (c *FileCatalog) Layer(f file.Reference) *Layer { c.RLock() defer c.RUnlock() - _, ok := c.catalog[f.ID()] - return ok -} -// Get fetches a FileCatalogEntry for the given file reference, or returns an error if the file reference has not -// been added to the catalog. -func (c *FileCatalog) Get(f file.Reference) (FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - value, ok := c.catalog[f.ID()] - if !ok { - return FileCatalogEntry{}, ErrFileNotFound - } - return value, nil -} - -func (c *FileCatalog) Basenames() []string { - c.RLock() - defer c.RUnlock() - - bns := c.basenames.List() - sort.Strings(bns) - return bns -} - -func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - - fileIDs, ok := c.byMIMEType[mType] - if !ok { - return nil, nil - } - - var entries []FileCatalogEntry - for _, id := range fileIDs { - entry, ok := c.catalog[id] - if !ok { - return nil, ErrFileNotFound - } - entries = append(entries, entry) - } - - return entries, nil -} - -func (c *FileCatalog) GetByExtension(extension string) ([]FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - - fileIDs, ok := c.byExtension[extension] - if !ok { - return nil, nil - } - - var entries []FileCatalogEntry - for _, id := range fileIDs { - entry, ok := c.catalog[id] - if !ok { - return nil, ErrFileNotFound - } - entries = append(entries, entry) - } - - return entries, nil -} - -func (c *FileCatalog) GetByBasename(basename string) ([]FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - - if strings.Contains(basename, "/") { - return nil, fmt.Errorf("found directory separator in a basename") - } - - fileIDs, ok := c.byBasename[basename] - if !ok { - return nil, nil - } - - var entries []FileCatalogEntry - for _, id := range fileIDs { - entry, ok := c.catalog[id] - if !ok { - return nil, ErrFileNotFound - } - entries = append(entries, entry) - } - - return entries, nil -} - -func (c *FileCatalog) GetByBasenameGlob(globs ...string) ([]FileCatalogEntry, error) { - c.RLock() - defer c.RUnlock() - - var fileEntries []FileCatalogEntry - basenames := c.Basenames() - - for _, glob := range globs { - if strings.Contains(glob, "**") { - return nil, fmt.Errorf("basename glob patterns with '**' are not supported") - } - if strings.Contains(glob, "/") { - return nil, fmt.Errorf("found directory separator in a basename") - } - - patternObj := wildmatch.NewWildMatch(glob) - - for _, b := range basenames { - if patternObj.IsMatch(b) { - bns, err := c.GetByBasename(b) - if err != nil { - return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err) - } - fileEntries = append(fileEntries, bns...) - } - } - } - - return fileEntries, nil + return c.layerByID[f.ID()] } // FileContents reads the file contents for the given file reference from the underlying image/layer blob. An error @@ -210,38 +51,15 @@ func (c *FileCatalog) GetByBasenameGlob(globs ...string) ([]FileCatalogEntry, er func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) { c.RLock() defer c.RUnlock() - catalogEntry, ok := c.catalog[f.ID()] + + opener, ok := c.openerByID[f.ID()] if !ok { return nil, fmt.Errorf("could not find file: %+v", f.RealPath) } - if catalogEntry.Contents == nil { + if opener == nil { return nil, fmt.Errorf("no contents available for file: %+v", f.RealPath) } - return catalogEntry.Contents(), nil -} - -func fileExtensions(p string) []string { - var exts []string - p = strings.TrimSpace(p) - - // ignore oddities - if strings.HasSuffix(p, ".") { - return exts - } - - // ignore directories - if strings.HasSuffix(p, "/") { - return exts - } - - // ignore . which indicate a hidden file - p = strings.TrimLeft(path.Base(p), ".") - for i := len(p) - 1; i >= 0; i-- { - if p[i] == '.' { - exts = append(exts, p[i:]) - } - } - return exts + return opener(), nil } diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index 75061150..b32a142a 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -16,6 +16,7 @@ import ( "os/exec" "path" "path/filepath" + "strings" "testing" "github.com/go-test/deep" @@ -65,10 +66,9 @@ func TestFileCatalog_Add(t *testing.T) { catalog := NewFileCatalog() catalog.Add(*ref, metadata, layer, nil) - expected := FileCatalogEntry{ - File: *ref, - Metadata: metadata, - Layer: layer, + expected := filetree.IndexEntry{ + Reference: *ref, + Metadata: metadata, } actual, err := catalog.Get(*ref) @@ -79,6 +79,8 @@ func TestFileCatalog_Add(t *testing.T) { for d := range deep.Equal(expected, actual) { t.Errorf("diff: %+v", d) } + + assert.Equal(t, layer, catalog.Layer(*ref)) } type testLayerContent struct { @@ -212,22 +214,22 @@ func TestFileCatalog_GetByExtension(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), ) require.NoError(t, err) tests := []struct { name string input string - want []FileCatalogEntry + want []filetree.IndexEntry wantErr require.ErrorAssertionFunc }{ { name: "get simple extension", input: ".txt", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", TarHeaderName: "path/branch.d/one/file-1.txt", @@ -236,7 +238,8 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", TarHeaderName: "path/branch.d/two/file-2.txt", @@ -245,7 +248,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/file-3.txt"}, + Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", TarHeaderName: "path/file-3.txt", @@ -258,9 +261,10 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { name: "get mixed type extension", input: ".d", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d"}, + + Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", TarHeaderName: "path/branch.d/", @@ -269,7 +273,8 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", TarHeaderName: "path/branch.d/one/file-4.d", @@ -277,8 +282,10 @@ func TestFileCatalog_GetByExtension(t *testing.T) { MIMEType: "text/plain", }, }, + { - File: file.Reference{RealPath: "/path/common/branch.d"}, + + Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ Path: "/path/common/branch.d", TarHeaderName: "path/common/branch.d", @@ -287,7 +294,8 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/common/file-1.d"}, + + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ Path: "/path/common/file-1.d", TarHeaderName: "path/common/file-1.d", @@ -300,9 +308,9 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { name: "get long extension", input: ".tar.gz", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", TarHeaderName: "path/branch.d/one/.file-4.tar.gz", @@ -311,7 +319,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", TarHeaderName: "path/branch.d/one/file-4.tar.gz", @@ -324,9 +332,9 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { name: "get short extension", input: ".gz", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", TarHeaderName: "path/branch.d/one/.file-4.tar.gz", @@ -335,7 +343,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", TarHeaderName: "path/branch.d/one/file-4.tar.gz", @@ -348,7 +356,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { name: "get non-existent extension", input: ".blerg-123", - want: []FileCatalogEntry{}, + want: []filetree.IndexEntry{}, }, } for _, tt := range tests { @@ -365,7 +373,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), - cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + ); d != "" { t.Errorf("diff: %s", d) } }) @@ -382,22 +390,22 @@ func TestFileCatalog_GetByBasename(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), ) require.NoError(t, err) tests := []struct { name string input string - want []FileCatalogEntry + want []filetree.IndexEntry wantErr require.ErrorAssertionFunc }{ { name: "get existing file name", input: "file-1.txt", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", TarHeaderName: "path/branch.d/one/file-1.txt", @@ -410,14 +418,14 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { name: "get non-existing name", input: "file-11.txt", - want: []FileCatalogEntry{}, + want: []filetree.IndexEntry{}, }, { name: "get directory name", input: "branch.d", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d"}, + Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", TarHeaderName: "path/branch.d/", @@ -426,7 +434,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/common/branch.d"}, + Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ Path: "/path/common/branch.d", TarHeaderName: "path/common/branch.d", @@ -439,14 +447,15 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { name: "get symlink name", input: "file-1.d", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/common/file-1.d"}, + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ Path: "/path/common/file-1.d", TarHeaderName: "path/common/file-1.d", Linkname: "path/branch.d/one/file-1.txt", TypeFlag: 50, // symlink + }, }, }, @@ -471,7 +480,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), - cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + ); d != "" { t.Errorf("diff: %s", d) } }) @@ -488,22 +497,22 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), ) require.NoError(t, err) tests := []struct { name string input string - want []FileCatalogEntry + want []filetree.IndexEntry wantErr require.ErrorAssertionFunc }{ { name: "get existing file name", input: "file-1.*", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/common/file-1.d"}, + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ Path: "/path/common/file-1.d", TarHeaderName: "path/common/file-1.d", @@ -512,7 +521,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", TarHeaderName: "path/branch.d/one/file-1.txt", @@ -525,14 +534,14 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { name: "get non-existing name", input: "blerg-*.txt", - want: []FileCatalogEntry{}, + want: []filetree.IndexEntry{}, }, { name: "get directory name", input: "bran*.d", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d"}, + Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", TarHeaderName: "path/branch.d/", @@ -541,7 +550,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/common/branch.d"}, + Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ Path: "/path/common/branch.d", TarHeaderName: "path/common/branch.d", @@ -554,9 +563,9 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { name: "get symlink name", input: "file?1.d", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/common/file-1.d"}, + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ Path: "/path/common/file-1.d", TarHeaderName: "path/common/file-1.d", @@ -586,7 +595,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), - cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + ); d != "" { t.Errorf("diff: %s", d) } }) @@ -603,22 +612,22 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), ) require.NoError(t, err) tests := []struct { name string input string - want []FileCatalogEntry + want []filetree.IndexEntry wantErr require.ErrorAssertionFunc }{ { name: "get existing file mimetype", input: "text/plain", - want: []FileCatalogEntry{ + want: []filetree.IndexEntry{ { - File: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", TarHeaderName: "path/branch.d/one/.file-4.tar.gz", @@ -627,7 +636,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", TarHeaderName: "path/branch.d/one/file-1.txt", @@ -636,7 +645,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", TarHeaderName: "path/branch.d/one/file-4.d", @@ -645,7 +654,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", TarHeaderName: "path/branch.d/one/file-4.tar.gz", @@ -654,7 +663,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", TarHeaderName: "path/branch.d/two/file-2.txt", @@ -663,7 +672,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { }, }, { - File: file.Reference{RealPath: "/path/file-3.txt"}, + Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", TarHeaderName: "path/file-3.txt", @@ -676,7 +685,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { { name: "get non-existing mimetype", input: "text/bogus", - want: []FileCatalogEntry{}, + want: []filetree.IndexEntry{}, }, } for _, tt := range tests { @@ -693,7 +702,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), - cmpopts.IgnoreFields(FileCatalogEntry{}, "Contents")); d != "" { + ); d != "" { t.Errorf("diff: %s", d) } }) @@ -710,7 +719,7 @@ func TestFileCatalog_GetBasenames(t *testing.T) { // we don't need the index itself, just the side effect on the file catalog after indexing _, err := file.NewTarIndex( fixtureTarFile.Name(), - layerTarIndexer(ft, &fileCatalog, &size, nil, nil), + layerTarIndexer(ft, fileCatalog, &size, nil, nil), ) require.NoError(t, err) @@ -821,3 +830,27 @@ func fileExists(t *testing.T, filename string) bool { } return !info.IsDir() } + +func fileExtensions(p string) []string { + var exts []string + p = strings.TrimSpace(p) + + // ignore oddities + if strings.HasSuffix(p, ".") { + return exts + } + + // ignore directories + if strings.HasSuffix(p, "/") { + return exts + } + + // ignore . which indicate a hidden file + p = strings.TrimLeft(path.Base(p), ".") + for i := len(p) - 1; i >= 0; i-- { + if p[i] == '.' { + exts = append(exts, p[i:]) + } + } + return exts +} diff --git a/pkg/image/image.go b/pkg/image/image.go index 4711c4a9..34a28658 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -30,7 +30,7 @@ type Image struct { // Layers contains the rich layer objects in build order Layers []*Layer // FileCatalog contains all file metadata for all files in all layers - FileCatalog FileCatalog + FileCatalog *FileCatalog overrideMetadata []AdditionalMetadata } @@ -127,7 +127,7 @@ func WithOS(o string) AdditionalMetadata { } } -// NewImage provides a new, unread image object. +// NewImage provides a new (unread) image object. func NewImage(image v1.Image, contentCacheDir string, additionalMetadata ...AdditionalMetadata) *Image { imgObj := &Image{ image: image, @@ -201,7 +201,7 @@ func (i *Image) Read() error { for idx, v1Layer := range v1Layers { layer := NewLayer(v1Layer) - err := layer.Read(&i.FileCatalog, i.Metadata, idx, i.contentCacheDir) + err := layer.Read(i.FileCatalog, i.Metadata, idx, i.contentCacheDir) if err != nil { return err } @@ -264,37 +264,29 @@ func (i *Image) SquashedTree() *filetree.FileTree { // FileContentsFromSquash fetches file contents for a single path, relative to the image squash tree. // If the path does not exist an error is returned. func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(i.SquashedTree(), &i.FileCatalog, path) + return fetchFileContentsByPath(i.SquashedTree(), i.FileCatalog, path) +} + +func (i *Image) SquashedSearchContext() filetree.Searcher { + return filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog.Index) } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. -func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(i.SquashedTree(), &i.FileCatalog, ty) - if err != nil { - return nil, err +// Deprecated: please use SquashedSearchContext().SearchByMIMEType() instead. +func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { + var refs []file.Reference + refVias, err := i.SquashedSearchContext().SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } -// FilesByExtensionFromSquash returns file references for files that have the given extension relative to the squash tree. -func (i *Image) FilesByExtensionFromSquash(extension string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByExtension(i.SquashedTree(), &i.FileCatalog, extension) -} - -// FilesByBasenameFromSquash returns file references for files with the given basename relative to the squash tree. -func (i *Image) FilesByBasenameFromSquash(basename string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasename(i.SquashedTree(), &i.FileCatalog, basename) -} - -// FilesByBasenameGlobFromSquash returns file references for files with the given basename glob pattern relative to the squash tree. -func (i *Image) FilesByBasenameGlobFromSquash(globs ...string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasenameGlob(i.SquashedTree(), &i.FileCatalog, globs...) -} - // FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { diff --git a/pkg/image/image_test.go b/pkg/image/image_test.go index cd38df1b..1030b48d 100644 --- a/pkg/image/image_test.go +++ b/pkg/image/image_test.go @@ -3,10 +3,11 @@ package image import ( "crypto/sha256" "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "os" "testing" - "github.com/go-test/deep" "github.com/google/go-containerregistry/pkg/name" ) @@ -103,7 +104,11 @@ func TestImageAdditionalMetadata(t *testing.T) { if err != nil { t.Fatalf("could not create image: %+v", err) } - for _, d := range deep.Equal(img, &test.image) { + if d := cmp.Diff(img, &test.image, + cmpopts.IgnoreFields(Image{}, "FileCatalog"), + cmpopts.IgnoreUnexported(Image{}), + cmp.AllowUnexported(name.Tag{}, name.Repository{}, name.Registry{}), + ); d != "" { t.Errorf("diff: %+v", d) } }) diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 5d7c6789..c53fa897 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -155,59 +155,43 @@ func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { } // FilesByMIMEType returns file references for files that match at least one of the given MIME types relative to each layer tree. -func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(l.Tree, l.fileCatalog, ty) - if err != nil { - return nil, err +// Deprecated: use SearchContext().SearchByMIMEType() instead. +func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { + var refs []file.Reference + refVias, err := l.SearchContext().SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types relative to the squashed file tree representation. -func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia - for _, ty := range mimeTypes { - refsForType, err := fetchFilesByMIMEType(l.SquashedTree, l.fileCatalog, ty) - if err != nil { - return nil, err +// Deprecated: use SquashedSearchContext().SearchByMIMEType() instead. +func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { + var refs []file.Reference + refVias, err := l.SquashedSearchContext().SearchByMIMEType(mimeTypes...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if refVia.HasReference() { + refs = append(refs, *refVia.Reference) } - refs = append(refs, refsForType...) } return refs, nil } -// FilesByExtension returns file references for files that have the given extension. -func (l *Layer) FilesByExtension(extension string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByExtension(l.Tree, l.fileCatalog, extension) -} - -// FilesByExtensionFromSquash returns file references for files have the given extension relative to the squash tree. -func (l *Layer) FilesByExtensionFromSquash(extension string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByExtension(l.SquashedTree, l.fileCatalog, extension) -} - -// FilesByBasename returns file references for files that have the following basename. -func (l *Layer) FilesByBasename(basename string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasename(l.Tree, l.fileCatalog, basename) -} - -// FilesByBasenameFromSquash returns file references for files by name relative to the squash tree. -func (l *Layer) FilesByBasenameFromSquash(extension string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasename(l.SquashedTree, l.fileCatalog, extension) -} - -// FilesByBasenameGlob returns file references for files that have the following basename glob. -func (l *Layer) FilesByBasenameGlob(glob string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasenameGlob(l.Tree, l.fileCatalog, glob) +func (l *Layer) SearchContext() filetree.Searcher { + return filetree.NewSearchContext(l.Tree, l.fileCatalog.Index) } -// FilesByBasenameGlobFromSquash returns file references for files by basename glob pattern relative to the squash tree. -func (l *Layer) FilesByBasenameGlobFromSquash(glob string) ([]file.ReferenceAccessVia, error) { - return fetchFilesByBasenameGlob(l.SquashedTree, l.fileCatalog, glob) +func (l *Layer) SquashedSearchContext() filetree.Searcher { + return filetree.NewSearchContext(l.SquashedTree, l.fileCatalog.Index) } func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { diff --git a/test/integration/mime_type_detection_test.go b/test/integration/mime_type_detection_test.go index f905ad34..abfee547 100644 --- a/test/integration/mime_type_detection_test.go +++ b/test/integration/mime_type_detection_test.go @@ -22,7 +22,7 @@ func TestContentMIMETypeDetection(t *testing.T) { } for mimeType, paths := range pathsByMIMEType { - refs, err := img.FilesByMIMETypeFromSquash(mimeType) + refs, err := img.SquashedSearchContext().SearchByMIMEType(mimeType) assert.NoError(t, err) assert.NotZero(t, len(refs), "found no refs for type=%q", mimeType) for _, ref := range refs { From 58b595c47de1aab9ea5c8190d794503ac80e4b64 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 15:17:26 -0500 Subject: [PATCH 05/35] add tests for new search context object Signed-off-by: Alex Goodman --- pkg/filetree/search_test.go | 253 ++++++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 pkg/filetree/search_test.go diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go new file mode 100644 index 00000000..766abfcd --- /dev/null +++ b/pkg/filetree/search_test.go @@ -0,0 +1,253 @@ +package filetree + +import ( + "fmt" + "github.com/anchore/stereoscope/pkg/file" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + "testing" +) + +func Test_searchContext_SearchByPath(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + path string + options []LinkResolutionOption + } + + tree := NewFileTree() + ref, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, ref) + + idx := NewIndex() + idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want *file.ReferenceAccessVia + wantErr require.ErrorAssertionFunc + }{ + { + name: "path exists", + fields: defaultFields, + args: args{ + path: "/path/to/file.txt", + }, + want: &file.ReferenceAccessVia{ + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + { + name: "path does not exists", + fields: defaultFields, + args: args{ + path: "/NOT/path/to/file.txt", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + i := searchContext{ + tree: tt.fields.tree, + index: tt.fields.index, + } + got, err := i.SearchByPath(tt.args.path, tt.args.options...) + tt.wantErr(t, err, fmt.Sprintf("SearchByPath(%v, %v)", tt.args.path, tt.args.options)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByPath() mismatch (-want +got):\n%s", d) + } + }) + } +} + +func Test_searchContext_SearchByGlob(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + glob string + options []LinkResolutionOption + } + + tree := NewFileTree() + ref, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, ref) + + idx := NewIndex() + idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want []file.ReferenceAccessVia + wantErr require.ErrorAssertionFunc + }{ + { + name: "path exists", + fields: defaultFields, + args: args{ + glob: "/**/t?/fil?.txt", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, + { + name: "path does not exists", + fields: defaultFields, + args: args{ + glob: "/NOT/**/file", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + i := searchContext{ + tree: tt.fields.tree, + index: tt.fields.index, + } + got, err := i.SearchByGlob(tt.args.glob, tt.args.options...) + tt.wantErr(t, err, fmt.Sprintf("SearchByGlob(%v, %v)", tt.args.glob, tt.args.options)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByGlob() mismatch (-want +got):\n%s", d) + } + }) + } +} + +func Test_searchContext_SearchByMIMEType(t *testing.T) { + type fields struct { + tree *FileTree + index Index + } + type args struct { + mimeTypes string + } + + tree := NewFileTree() + ref, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, ref) + + idx := NewIndex() + idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + + defaultFields := fields{ + tree: tree, + index: idx, + } + + tests := []struct { + name string + fields fields + args args + want []file.ReferenceAccessVia + wantErr require.ErrorAssertionFunc + }{ + { + name: "types exists", + fields: defaultFields, + args: args{ + mimeTypes: "plain/text", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, + { + name: "types do not exists", + fields: defaultFields, + args: args{ + mimeTypes: "octetstream", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + i := searchContext{ + tree: tt.fields.tree, + index: tt.fields.index, + } + got, err := i.SearchByMIMEType(tt.args.mimeTypes) + tt.wantErr(t, err, fmt.Sprintf("SearchByMIMEType(%v)", tt.args.mimeTypes)) + if err != nil { + return + } + + opts := []cmp.Option{ + cmpopts.IgnoreFields(file.Reference{}, "id"), + } + + if d := cmp.Diff(tt.want, got, opts...); d != "" { + t.Errorf("SearchByMIMEType() mismatch (-want +got):\n%s", d) + } + }) + } +} From 6b9223f1bc62bc6b41aae87e40386cc72eb422d4 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 15:17:45 -0500 Subject: [PATCH 06/35] remove unused tar header fields from file.Metadata struct Signed-off-by: Alex Goodman --- pkg/file/metadata.go | 26 ++-- pkg/file/metadata_test.go | 16 +-- pkg/file/tarutil.go | 2 +- pkg/file/tarutil_test.go | 40 +++---- pkg/filetree/filetree.go | 2 +- pkg/filetree/index.go | 6 +- pkg/image/file_catalog_test.go | 210 ++++++++++++++------------------- pkg/image/layer.go | 2 +- 8 files changed, 135 insertions(+), 169 deletions(-) diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 1cbd0d92..16595286 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -14,10 +14,6 @@ import ( type Metadata struct { // Path is the absolute path representation to the file Path string - // TarHeaderName is the exact entry name as found within a tar header - TarHeaderName string - // TarSequence is the nth header in the tar file this entry was found - TarSequence int64 // Linkname is populated only for hardlinks / symlinks, can be an absolute or relative Linkname string // Size of the file in bytes @@ -31,19 +27,17 @@ type Metadata struct { MIMEType string } -func NewMetadata(header tar.Header, sequence int64, content io.Reader) Metadata { +func NewMetadata(header tar.Header, content io.Reader) Metadata { return Metadata{ - Path: path.Clean(DirSeparator + header.Name), - TarHeaderName: header.Name, - TarSequence: sequence, - TypeFlag: header.Typeflag, - Linkname: header.Linkname, - Size: header.FileInfo().Size(), - Mode: header.FileInfo().Mode(), - UserID: header.Uid, - GroupID: header.Gid, - IsDir: header.FileInfo().IsDir(), - MIMEType: MIMEType(content), + Path: path.Clean(DirSeparator + header.Name), + TypeFlag: header.Typeflag, + Linkname: header.Linkname, + Size: header.FileInfo().Size(), + Mode: header.FileInfo().Mode(), + UserID: header.Uid, + GroupID: header.Gid, + IsDir: header.FileInfo().IsDir(), + MIMEType: MIMEType(content), } } diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go index b89ed431..16dd82c1 100644 --- a/pkg/file/metadata_test.go +++ b/pkg/file/metadata_test.go @@ -16,13 +16,13 @@ func TestFileMetadataFromTar(t *testing.T) { tarReader := getTarFixture(t, "fixture-1") expected := []Metadata{ - {Path: "/path", TarSequence: 0, TarHeaderName: "path/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch", TarSequence: 1, TarHeaderName: "path/branch/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one", TarSequence: 2, TarHeaderName: "path/branch/one/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one/file-1.txt", TarSequence: 3, TarHeaderName: "path/branch/one/file-1.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/branch/two", TarSequence: 4, TarHeaderName: "path/branch/two/", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/two/file-2.txt", TarSequence: 5, TarHeaderName: "path/branch/two/file-2.txt", TypeFlag: 48, Linkname: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/file-3.txt", TarSequence: 6, TarHeaderName: "path/file-3.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one/file-1.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/branch/two", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/two/file-2.txt", TypeFlag: 48, Linkname: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/file-3.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, } var actual []Metadata @@ -31,7 +31,7 @@ func TestFileMetadataFromTar(t *testing.T) { if strings.HasSuffix(entry.Header.Name, ".txt") { contents = strings.NewReader("#!/usr/bin/env bash\necho 'awesome script'") } - actual = append(actual, NewMetadata(entry.Header, entry.Sequence, contents)) + actual = append(actual, NewMetadata(entry.Header, contents)) return nil } diff --git a/pkg/file/tarutil.go b/pkg/file/tarutil.go index b08416b2..4a01c09a 100644 --- a/pkg/file/tarutil.go +++ b/pkg/file/tarutil.go @@ -108,7 +108,7 @@ func MetadataFromTar(reader io.ReadCloser, tarPath string) (Metadata, error) { if entry.Header.Size > 0 { content = reader } - m := NewMetadata(entry.Header, entry.Sequence, content) + m := NewMetadata(entry.Header, content) metadata = &m return ErrTarStopIteration } diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go index 4c93fa58..220f850e 100644 --- a/pkg/file/tarutil_test.go +++ b/pkg/file/tarutil_test.go @@ -62,34 +62,30 @@ func TestMetadataFromTar(t *testing.T) { name: "path/branch/two/file-2.txt", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two/file-2.txt", - TarHeaderName: "path/branch/two/file-2.txt", - TarSequence: 5, - Linkname: "", - Size: 12, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x30, - IsDir: false, - Mode: 0x1ed, - MIMEType: "application/octet-stream", + Path: "/path/branch/two/file-2.txt", + Linkname: "", + Size: 12, + UserID: 1337, + GroupID: 5432, + TypeFlag: 0x30, + IsDir: false, + Mode: 0x1ed, + MIMEType: "application/octet-stream", }, }, { name: "path/branch/two/", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two", - TarHeaderName: "path/branch/two/", - TarSequence: 4, - Linkname: "", - Size: 0, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x35, - IsDir: true, - Mode: 0x800001ed, - MIMEType: "", + Path: "/path/branch/two", + Linkname: "", + Size: 0, + UserID: 1337, + GroupID: 5432, + TypeFlag: 0x35, + IsDir: true, + Mode: 0x800001ed, + MIMEType: "", }, }, } diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index e85efc1a..91b8624c 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -433,7 +433,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks). func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { - results := make([]file.ReferenceAccessVia, 0) + var results []file.ReferenceAccessVia if len(query) == 0 { return nil, fmt.Errorf("no glob pattern given") diff --git a/pkg/filetree/index.go b/pkg/filetree/index.go index c32ab0d5..89e74ee7 100644 --- a/pkg/filetree/index.go +++ b/pkg/filetree/index.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path" + "sort" "strings" "sync" @@ -111,7 +112,10 @@ func (c *index) Basenames() []string { c.RLock() defer c.RUnlock() - return c.basenames.List() + bns := c.basenames.List() + sort.Strings(bns) + + return bns } func (c *index) GetByMIMEType(mType string) ([]IndexEntry, error) { diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index b32a142a..c64cfd07 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -39,15 +39,14 @@ func TestFileCatalog_Add(t *testing.T) { ref := file.NewFileReference("/somepath") metadata := file.Metadata{ - Path: "a", - TarHeaderName: "b", - Linkname: "c", - Size: 1, - UserID: 2, - GroupID: 3, - TypeFlag: 4, - IsDir: true, - Mode: 5, + Path: "a", + Linkname: "c", + Size: 1, + UserID: 2, + GroupID: 3, + TypeFlag: 4, + IsDir: true, + Mode: 5, } layer := &Layer{ @@ -119,8 +118,7 @@ func TestFileCatalog_FileContents(t *testing.T) { expected := "first file\n" metadata := file.Metadata{ - Path: p, - TarHeaderName: p, + Path: p, } tr, err := file.NewTarIndex(fixtureFile.Name(), nil) @@ -231,29 +229,26 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-1.txt", - TarHeaderName: "path/branch.d/one/file-1.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/two/file-2.txt", - TarHeaderName: "path/branch.d/two/file-2.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/two/file-2.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ - Path: "/path/file-3.txt", - TarHeaderName: "path/file-3.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/file-3.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -266,20 +261,18 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TarHeaderName: "path/branch.d/", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + TypeFlag: 53, + IsDir: true, }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-4.d", - TarHeaderName: "path/branch.d/one/file-4.d", - TypeFlag: 48, // regular file - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-4.d", + TypeFlag: 48, // regular file + MIMEType: "text/plain", }, }, @@ -287,20 +280,18 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - TarHeaderName: "path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink }, }, { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - TarHeaderName: "path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink + Path: "/path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink }, }, }, @@ -312,19 +303,17 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/.file-4.tar.gz", - TarHeaderName: "path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-4.tar.gz", - TarHeaderName: "path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -336,19 +325,17 @@ func TestFileCatalog_GetByExtension(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/.file-4.tar.gz", - TarHeaderName: "path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-4.tar.gz", - TarHeaderName: "path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -372,7 +359,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { if d := cmp.Diff(tt.want, actual, cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), - cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), ); d != "" { t.Errorf("diff: %s", d) } @@ -407,10 +394,9 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-1.txt", - TarHeaderName: "path/branch.d/one/file-1.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -427,19 +413,17 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TarHeaderName: "path/branch.d/", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + TypeFlag: 53, + IsDir: true, }, }, { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - TarHeaderName: "path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink }, }, }, @@ -451,10 +435,9 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - TarHeaderName: "path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink + Path: "/path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink }, }, @@ -479,7 +462,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { if d := cmp.Diff(tt.want, actual, cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), - cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), ); d != "" { t.Errorf("diff: %s", d) } @@ -514,19 +497,17 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - TarHeaderName: "path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, + Path: "/path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-1.txt", - TarHeaderName: "path/branch.d/one/file-1.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -543,19 +524,17 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TarHeaderName: "path/branch.d/", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + TypeFlag: 53, + IsDir: true, }, }, { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - TarHeaderName: "path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + Linkname: "path/branch.d", + TypeFlag: 50, // symlink }, }, }, @@ -567,10 +546,9 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - TarHeaderName: "path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink + Path: "/path/common/file-1.d", + Linkname: "path/branch.d/one/file-1.txt", + TypeFlag: 50, // symlink }, }, }, @@ -594,7 +572,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { if d := cmp.Diff(tt.want, actual, cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), - cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), ); d != "" { t.Errorf("diff: %s", d) } @@ -629,55 +607,49 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/.file-4.tar.gz", - TarHeaderName: "path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/.file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-1.txt", - TarHeaderName: "path/branch.d/one/file-1.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-1.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-4.d", - TarHeaderName: "path/branch.d/one/file-4.d", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-4.d", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ - Path: "/path/branch.d/one/file-4.tar.gz", - TarHeaderName: "path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/one/file-4.tar.gz", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ - Path: "/path/branch.d/two/file-2.txt", - TarHeaderName: "path/branch.d/two/file-2.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/branch.d/two/file-2.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ - Path: "/path/file-3.txt", - TarHeaderName: "path/file-3.txt", - TypeFlag: 48, - MIMEType: "text/plain", + Path: "/path/file-3.txt", + TypeFlag: 48, + MIMEType: "text/plain", }, }, }, @@ -701,7 +673,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { if d := cmp.Diff(tt.want, actual, cmpopts.EquateEmpty(), cmpopts.IgnoreUnexported(file.Reference{}), - cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size", "TarSequence"), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), ); d != "" { t.Errorf("diff: %s", d) } diff --git a/pkg/image/layer.go b/pkg/image/layer.go index c53fa897..4bc29fe6 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -205,7 +205,7 @@ func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int6 log.Warnf("unable to close file while indexing layer: %+v", err) } }() - metadata := file.NewMetadata(entry.Header, entry.Sequence, contents) + metadata := file.NewMetadata(entry.Header, contents) // note: the tar header name is independent of surrounding structure, for example, there may be a tar header entry // for /some/path/to/file.txt without any entries to constituent paths (/some, /some/path, /some/path/to ). From 47e967a30203235d05b71874d07e70c309ed86db Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 16:34:29 -0500 Subject: [PATCH 07/35] use singular file type definitions Signed-off-by: Alex Goodman --- pkg/file/metadata.go | 61 ++++++--- pkg/file/metadata_test.go | 14 +-- pkg/file/tarutil_test.go | 36 +++--- pkg/file/type.go | 118 +++++++++++++++--- pkg/filetree/filetree.go | 12 +- pkg/filetree/glob_test.go | 8 +- pkg/filetree/search.go | 6 +- pkg/image/file_catalog_test.go | 109 ++++++++-------- pkg/image/layer.go | 19 ++- test/integration/fixture_image_simple_test.go | 18 ++- .../fixture_image_symlinks_test.go | 14 +-- 11 files changed, 263 insertions(+), 152 deletions(-) diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 16595286..4eb93410 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -14,14 +14,14 @@ import ( type Metadata struct { // Path is the absolute path representation to the file Path string - // Linkname is populated only for hardlinks / symlinks, can be an absolute or relative - Linkname string + // LinkDestination is populated only for hardlinks / symlinks, can be an absolute or relative + LinkDestination string // Size of the file in bytes Size int64 UserID int GroupID int - // TypeFlag is the tar.TypeFlag entry for the file - TypeFlag byte + // Type is the tar.Type entry for the file + Type Type IsDir bool Mode os.FileMode MIMEType string @@ -29,15 +29,15 @@ type Metadata struct { func NewMetadata(header tar.Header, content io.Reader) Metadata { return Metadata{ - Path: path.Clean(DirSeparator + header.Name), - TypeFlag: header.Typeflag, - Linkname: header.Linkname, - Size: header.FileInfo().Size(), - Mode: header.FileInfo().Mode(), - UserID: header.Uid, - GroupID: header.Gid, - IsDir: header.FileInfo().IsDir(), - MIMEType: MIMEType(content), + Path: path.Clean(DirSeparator + header.Name), + Type: TypeFromTarType(header.Typeflag), + LinkDestination: header.Linkname, + Size: header.FileInfo().Size(), + Mode: header.FileInfo().Mode(), + UserID: header.Uid, + GroupID: header.Gid, + IsDir: header.FileInfo().IsDir(), + MIMEType: MIMEType(content), } } @@ -48,12 +48,37 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error return Metadata{}, err } + var ty Type + switch { + case fi.IsDir(): + ty = TypeDir + case f.IsRegular(): + ty = TypeReg + case f.IsSymlink(): + ty = TypeSymlink + default: + switch fi.Mode() & os.ModeType { + case os.ModeNamedPipe: + ty = TypeFifo + case os.ModeSocket: + ty = TypeSocket + case os.ModeDevice: + ty = TypeBlockDevice + case os.ModeCharDevice: + ty = TypeCharacterDevice + case os.ModeIrregular: + ty = TypeIrregular + } + // note: cannot determine hardlink from squashfs.File (but case us not possible) + } + md := Metadata{ - Path: filepath.Clean(filepath.Join("/", path)), - Linkname: f.SymlinkPath(), - Size: fi.Size(), - IsDir: f.IsDir(), - Mode: fi.Mode(), + Path: filepath.Clean(filepath.Join("/", path)), + LinkDestination: f.SymlinkPath(), + Size: fi.Size(), + IsDir: f.IsDir(), + Mode: fi.Mode(), + Type: ty, } if f.IsRegular() { diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go index 16dd82c1..cf3d1ee2 100644 --- a/pkg/file/metadata_test.go +++ b/pkg/file/metadata_test.go @@ -16,13 +16,13 @@ func TestFileMetadataFromTar(t *testing.T) { tarReader := getTarFixture(t, "fixture-1") expected := []Metadata{ - {Path: "/path", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one/file-1.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/branch/two", TypeFlag: 53, Linkname: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/two/file-2.txt", TypeFlag: 48, Linkname: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/file-3.txt", TypeFlag: 48, Linkname: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one/file-1.txt", Type: TypeReg, LinkDestination: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/branch/two", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/two/file-2.txt", Type: TypeReg, LinkDestination: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/file-3.txt", Type: TypeReg, LinkDestination: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, } var actual []Metadata diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go index 220f850e..5d8fcb18 100644 --- a/pkg/file/tarutil_test.go +++ b/pkg/file/tarutil_test.go @@ -62,30 +62,30 @@ func TestMetadataFromTar(t *testing.T) { name: "path/branch/two/file-2.txt", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two/file-2.txt", - Linkname: "", - Size: 12, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x30, - IsDir: false, - Mode: 0x1ed, - MIMEType: "application/octet-stream", + Path: "/path/branch/two/file-2.txt", + LinkDestination: "", + Size: 12, + UserID: 1337, + GroupID: 5432, + Type: TypeReg, + IsDir: false, + Mode: 0x1ed, + MIMEType: "application/octet-stream", }, }, { name: "path/branch/two/", fixture: "fixture-1", expected: Metadata{ - Path: "/path/branch/two", - Linkname: "", - Size: 0, - UserID: 1337, - GroupID: 5432, - TypeFlag: 0x35, - IsDir: true, - Mode: 0x800001ed, - MIMEType: "", + Path: "/path/branch/two", + LinkDestination: "", + Size: 0, + UserID: 1337, + GroupID: 5432, + Type: TypeDir, + IsDir: true, + Mode: 0x800001ed, + MIMEType: "", }, }, } diff --git a/pkg/file/type.go b/pkg/file/type.go index c2f9db47..4c6f6775 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -1,25 +1,109 @@ package file -import "archive/tar" +import ( + "archive/tar" + "os" +) const ( - TypeReg Type = tar.TypeReg - TypeDir Type = tar.TypeDir - TypeSymlink Type = tar.TypeSymlink - TypeHardLink Type = tar.TypeLink - TypeCharacterDevice Type = tar.TypeChar - TypeBlockDevice Type = tar.TypeBlock - TypeFifo Type = tar.TypeFifo + TypeReg Type = 'r' + TypeHardLink Type = 'h' + TypeSymlink Type = 'l' + TypeCharacterDevice Type = 'c' + TypeBlockDevice Type = 'b' + TypeDir Type = 'd' + TypeFifo Type = 'f' + TypeSocket Type = 's' + TypeIrregular Type = '?' ) -var AllTypes = []Type{ - TypeReg, - TypeDir, - TypeSymlink, - TypeHardLink, - TypeCharacterDevice, - TypeBlockDevice, - TypeFifo, -} +// why use a rune type? we're looking for something that is memory compact but is easily human interpretable. type Type rune + +func AllTypes() []Type { + return []Type{ + TypeReg, + TypeHardLink, + TypeSymlink, + TypeCharacterDevice, + TypeBlockDevice, + TypeDir, + TypeFifo, + TypeSocket, + TypeIrregular, + } +} + +func TypeFromTarType(ty byte) Type { + switch ty { + case tar.TypeReg, tar.TypeRegA: + return TypeReg + case tar.TypeLink: + return TypeHardLink + case tar.TypeSymlink: + return TypeSymlink + case tar.TypeChar: + return TypeCharacterDevice + case tar.TypeBlock: + return TypeBlockDevice + case tar.TypeDir: + return TypeDir + case tar.TypeFifo: + return TypeFifo + default: + return TypeIrregular + } +} + +func TypeFromMode(mode os.FileMode) Type { + switch { + case isSet(mode, os.ModeSymlink): + return TypeSymlink + case isSet(mode, os.ModeIrregular): + return TypeIrregular + case isSet(mode, os.ModeCharDevice): + return TypeCharacterDevice + case isSet(mode, os.ModeDevice): + return TypeBlockDevice + case isSet(mode, os.ModeNamedPipe): + return TypeFifo + case isSet(mode, os.ModeSocket): + return TypeSocket + case mode.IsDir(): + return TypeDir + case mode.IsRegular(): + return TypeReg + default: + return TypeIrregular + } +} + +func isSet(mode, field os.FileMode) bool { + return mode&field != 0 +} + +func (t Type) String() string { + switch t { + case TypeReg: + return "RegularFile" + case TypeHardLink: + return "HardLink" + case TypeSymlink: + return "SymbolicLink" + case TypeCharacterDevice: + return "CharacterDevice" + case TypeBlockDevice: + return "BlockDevice" + case TypeDir: + return "Directory" + case TypeFifo: + return "FIFONode" + case TypeSocket: + return "Socket" + case TypeIrregular: + return "IrregularFile" + default: + return "Unknown" + } +} diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 91b8624c..2dfb9e6e 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -19,15 +19,13 @@ var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the Fil var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") type Reader interface { - AllRealPaths() []file.Path - AllFiles(types ...file.Type) []file.Reference - ListPaths(dir file.Path) ([]file.Path, error) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) - Reader() tree.Reader - Equal(other *FileTree) bool - PathDiff(other *FileTree) (extra, missing []file.Path) + FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) + // note: there are more reader-like functions, however, let's try to keep this interface small and simple for now +} + +type Walker interface { Walk(fn func(path file.Path, f filenode.FileNode) error, conditions *WalkConditions) error - HasPath(path file.Path, options ...LinkResolutionOption) bool } type Writer interface { diff --git a/pkg/filetree/glob_test.go b/pkg/filetree/glob_test.go index 6d63b1de..3f47837d 100644 --- a/pkg/filetree/glob_test.go +++ b/pkg/filetree/glob_test.go @@ -240,20 +240,20 @@ func TestOSAdapter_ReadDir(t *testing.T) { expected: []fileinfoAdapter{ { VirtualPath: "/home/thing.txt", - Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: 48}, + Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: file.TypeReg}, }, { VirtualPath: "/home/wagoodman", - Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: 53}, + Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: file.TypeDir}, }, { VirtualPath: "/home/thing", - Node: filenode.FileNode{RealPath: "/home/thing", FileType: 50, LinkPath: "./thing.txt"}, + Node: filenode.FileNode{RealPath: "/home/thing", FileType: file.TypeSymlink, LinkPath: "./thing.txt"}, }, { VirtualPath: "/home/place", - Node: filenode.FileNode{RealPath: "/home/place", FileType: 49, LinkPath: "/somewhere-else"}, + Node: filenode.FileNode{RealPath: "/home/place", FileType: file.TypeHardLink, LinkPath: "/somewhere-else"}, }, }, shouldErr: false, diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 7089f6e7..38ec5de9 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -15,11 +15,11 @@ type Searcher interface { } type searchContext struct { - tree *FileTree // this is the tree which all index search results are filtered against - index Index // this index is relative to one or more trees, not just necessarily one + tree Reader // this is the tree which all index search results are filtered against + index Index // this index is relative to one or more trees, not just necessarily one } -func NewSearchContext(tree *FileTree, index Index) Searcher { +func NewSearchContext(tree Reader, index Index) Searcher { return &searchContext{ tree: tree, index: index, diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index c64cfd07..e6eddbf4 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -39,14 +39,14 @@ func TestFileCatalog_Add(t *testing.T) { ref := file.NewFileReference("/somepath") metadata := file.Metadata{ - Path: "a", - Linkname: "c", - Size: 1, - UserID: 2, - GroupID: 3, - TypeFlag: 4, - IsDir: true, - Mode: 5, + Path: "a", + LinkDestination: "c", + Size: 1, + UserID: 2, + GroupID: 3, + Type: 4, + IsDir: true, + Mode: 5, } layer := &Layer{ @@ -230,7 +230,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -239,7 +239,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -247,7 +247,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -261,9 +261,9 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, }, }, { @@ -271,7 +271,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - TypeFlag: 48, // regular file + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -280,18 +280,18 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, }, }, { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, }, }, }, @@ -304,7 +304,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -312,7 +312,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -326,7 +326,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -334,7 +334,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -395,7 +395,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -413,17 +413,17 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, }, }, { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, }, }, }, @@ -435,10 +435,9 @@ func TestFileCatalog_GetByBasename(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink - + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, }, }, }, @@ -497,16 +496,16 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -524,17 +523,17 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ - Path: "/path/branch.d", - TypeFlag: 53, - IsDir: true, + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, }, }, { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ - Path: "/path/common/branch.d", - Linkname: "path/branch.d", - TypeFlag: 50, // symlink + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, }, }, }, @@ -546,9 +545,9 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { { Reference: file.Reference{RealPath: "/path/common/file-1.d"}, Metadata: file.Metadata{ - Path: "/path/common/file-1.d", - Linkname: "path/branch.d/one/file-1.txt", - TypeFlag: 50, // symlink + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, }, }, }, @@ -608,7 +607,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -616,7 +615,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -624,7 +623,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -632,7 +631,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -640,7 +639,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, @@ -648,7 +647,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - TypeFlag: 48, + Type: file.TypeReg, MIMEType: "text/plain", }, }, diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 4bc29fe6..63f2a5b0 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -1,7 +1,6 @@ package image import ( - "archive/tar" "bytes" "errors" "fmt" @@ -218,18 +217,18 @@ func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int6 // In summary: the set of all FileTrees can have NON-leaf nodes that don't exist in the FileCatalog, but // the FileCatalog should NEVER have entries that don't appear in one (or more) FileTree(s). var fileReference *file.Reference - switch metadata.TypeFlag { - case tar.TypeSymlink: - fileReference, err = ft.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) + switch metadata.Type { + case file.TypeSymlink: + fileReference, err = ft.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) if err != nil { return err } - case tar.TypeLink: - fileReference, err = ft.AddHardLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) + case file.TypeHardLink: + fileReference, err = ft.AddHardLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) if err != nil { return err } - case tar.TypeDir: + case file.TypeDir: fileReference, err = ft.AddDir(file.Path(metadata.Path)) if err != nil { return err @@ -241,7 +240,7 @@ func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int6 } } if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.Linkname) + return fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.LinkDestination) } if size != nil { @@ -278,7 +277,7 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { switch { case f.IsSymlink(): - fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.Linkname)) + fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) if err != nil { return err } @@ -295,7 +294,7 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { } if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during squashfs iteration", metadata.Path, metadata.Linkname) + return fmt.Errorf("could not add path=%q link=%q during squashfs iteration", metadata.Path, metadata.LinkDestination) } l.Metadata.Size += metadata.Size diff --git a/test/integration/fixture_image_simple_test.go b/test/integration/fixture_image_simple_test.go index 24a44c9f..0b13a95f 100644 --- a/test/integration/fixture_image_simple_test.go +++ b/test/integration/fixture_image_simple_test.go @@ -177,11 +177,11 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { b.Run(c.source, func(b *testing.B) { for i := 0; i < b.N; i++ { for _, ref := range paths { - f, err := img.FileCatalog.Get(ref) + f, err := img.FileCatalog.FileContents(ref) if err != nil { b.Fatalf("unable to read: %+v", err) } - _, err = io.ReadAll(f.Contents()) + _, err = io.ReadAll(f) } } }) @@ -190,7 +190,8 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues testCase) { t.Helper() - t.Log("Asserting metadata...") + //t.Log("Asserting metadata...") + if i.Metadata.MediaType != expectedValues.imageMediaType { t.Errorf("unexpected image media type: %+v", i.Metadata.MediaType) } @@ -226,7 +227,9 @@ func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues test } func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { - t.Log("Asserting squashed trees...") + t.Helper() + //t.Log("Asserting squashed trees...") + one := filetree.NewFileTree() one.AddFile("/somefile-1.txt") @@ -263,7 +266,9 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { } func assertImageSimpleTrees(t *testing.T, i *image.Image) { - t.Log("Asserting trees...") + t.Helper() + //t.Log("Asserting trees...") + one := filetree.NewFileTree() one.AddFile("/somefile-1.txt") @@ -290,7 +295,8 @@ func assertImageSimpleTrees(t *testing.T, i *image.Image) { } func assertImageSimpleContents(t *testing.T, i *image.Image) { - t.Log("Asserting contents...") + t.Helper() + //t.Log("Asserting contents...") expectedContents := map[string]string{ "/somefile-1.txt": "this file has contents", diff --git a/test/integration/fixture_image_symlinks_test.go b/test/integration/fixture_image_symlinks_test.go index a192396b..39ad6304 100644 --- a/test/integration/fixture_image_symlinks_test.go +++ b/test/integration/fixture_image_symlinks_test.go @@ -92,22 +92,22 @@ func assertMatch(t *testing.T, i *image.Image, cfg linkFetchConfig, expectedReso if actualResolve.ID() != expectedResolve.ID() { var exLayer = -1 var acLayer = -1 - var exType byte - var acType byte + var exType file.Type + var acType file.Type eM, err := i.FileCatalog.Get(*expectedResolve) if err == nil { - exLayer = int(eM.Layer.Metadata.Index) - exType = eM.Metadata.TypeFlag + exLayer = int(i.FileCatalog.Layer(*expectedResolve).Metadata.Index) + exType = eM.Metadata.Type } aM, err := i.FileCatalog.Get(*actualResolve) if err == nil { - acLayer = int(aM.Layer.Metadata.Index) - acType = aM.Metadata.TypeFlag + acLayer = int(i.FileCatalog.Layer(*actualResolve).Metadata.Index) + acType = aM.Metadata.Type } - t.Fatalf("mismatched link resolution link=%+v: '%+v (layer=%d type=%+v)'!='%+v (layer=%d type=%+v linkName=%s)'", cfg.linkPath, expectedResolve, exLayer, exType, actualResolve, acLayer, acType, aM.Metadata.Linkname) + t.Fatalf("mismatched link resolution link=%+v: <%+v (layer=%d type=%+v)> != <%+v (layer=%d type=%+v linkName=%s)>", cfg.linkPath, expectedResolve, exLayer, exType, actualResolve, acLayer, acType, aM.Metadata.LinkDestination) } } From 30c75673c529fe9c0ab8dbc32f003e5823fa58a1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 19:22:12 -0500 Subject: [PATCH 08/35] add logging for filetree searches Signed-off-by: Alex Goodman --- pkg/filetree/search.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 38ec5de9..effb9c43 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -3,6 +3,8 @@ package filetree import ( "fmt" + "github.com/anchore/stereoscope/internal/log" + "github.com/anchore/stereoscope/pkg/file" "github.com/bmatcuk/doublestar/v4" ) @@ -28,12 +30,16 @@ func NewSearchContext(tree Reader, index Index) Searcher { func (i searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) { // TODO: one day this could leverage indexes outside of the tree, but today this is not implemented + log.WithFields("path", path).Trace("searching filetree by path") + options = append(options, FollowBasenameLinks) _, ref, err := i.tree.File(file.Path(path), options...) return ref, err } func (i searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { + log.WithFields("types", mimeTypes).Trace("searching filetree by MIME types") + var fileEntries []IndexEntry for _, mType := range mimeTypes { @@ -48,6 +54,8 @@ func (i searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAc } func (i searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { + log.WithFields("glob", pattern).Trace("searching filetree by glob") + if i.index == nil { options = append(options, FollowBasenameLinks) return i.tree.FilesByGlob(pattern, options...) @@ -99,6 +107,8 @@ func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolu } return refs, nil case searchByGlob: + log.WithFields("glob", request.value).Trace("glob provided is an expensive search, consider using a more specific indexed search") + options = append(options, FollowBasenameLinks) return i.tree.FilesByGlob(request.value, options...) } From 64955563302e5b438951b9f3f0970cda9578552b Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 30 Jan 2023 21:17:37 -0500 Subject: [PATCH 09/35] add limited support for glob classes and alternatives Signed-off-by: Alex Goodman --- pkg/filetree/glob_parser.go | 154 ++++++++++--- pkg/filetree/glob_parser_test.go | 365 +++++++++++++++++++++++-------- pkg/filetree/search.go | 11 +- 3 files changed, 404 insertions(+), 126 deletions(-) diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go index 59cb0a96..0ef8dcc6 100644 --- a/pkg/filetree/glob_parser.go +++ b/pkg/filetree/glob_parser.go @@ -37,38 +37,60 @@ type searchRequest struct { requirement string } -func parseGlob(glob string) searchRequest { +func parseGlob(glob string) []searchRequest { glob = cleanGlob(glob) - if !strings.ContainsAny(glob, "*?") { - return searchRequest{ - searchBasis: searchByPath, - value: glob, + if !strings.ContainsAny(glob, "*?[]{}") { + return []searchRequest{ + { + searchBasis: searchByPath, + value: glob, + }, } } + beforeBasename, basename := splitAtBasename(glob) + requests := parseGlobBasename(basename) + for i := range requests { + applyRequirement(&requests[i], beforeBasename, glob) + } + + return requests +} + +func splitAtBasename(glob string) (string, string) { + // TODO: need to correctly avoid indexes within [] and {} groups basenameSplitAt := strings.LastIndex(glob, "/") var basename string + var beforeBasename string if basenameSplitAt == -1 { // note: this has no glob path prefix, thus no requirement... // this can only be a basename, basename glob, or extension basename = glob + beforeBasename = "" } else if basenameSplitAt < len(glob)-1 { basename = glob[basenameSplitAt+1:] } - request := parseGlobBasename(basename) + if basenameSplitAt >= 0 && basenameSplitAt < len(glob)-1 { + beforeBasename = glob[:basenameSplitAt] + } - requirement := glob - if basenameSplitAt == -1 { - requirement = "" - } else if basenameSplitAt < len(glob)-1 { - requirementSection := glob[:basenameSplitAt] - switch requirementSection { + return beforeBasename, basename +} + +func applyRequirement(request *searchRequest, beforeBasename, glob string) { + var requirement string + + if beforeBasename != "" { + requirement = glob + switch beforeBasename { case "**", request.requirement: requirement = "" } + } else { + requirement = "" } request.requirement = requirement @@ -79,41 +101,115 @@ func parseGlob(glob string) searchRequest { request.requirement = "" } } - - return request } -func parseGlobBasename(input string) searchRequest { - extensionFields := strings.Split(input, "*.") +func parseGlobBasename(basenameInput string) []searchRequest { + if strings.ContainsAny(basenameInput, "[]{}") { + return parseBasenameAltAndClassGlobSections(basenameInput) + } + + extensionFields := strings.Split(basenameInput, "*.") if len(extensionFields) == 2 && extensionFields[0] == "" { possibleExtension := extensionFields[1] if !strings.ContainsAny(possibleExtension, "*?") { // special case, this is plain extension - return searchRequest{ - searchBasis: searchByExtension, - value: "." + possibleExtension, + return []searchRequest{ + { + searchBasis: searchByExtension, + value: "." + possibleExtension, + }, } } } - if !strings.ContainsAny(input, "*?") { + if !strings.ContainsAny(basenameInput, "*?") { // special case, this is plain extension - return searchRequest{ - searchBasis: searchByBasename, - value: input, + return []searchRequest{ + { + searchBasis: searchByBasename, + value: basenameInput, + }, } } - if strings.ReplaceAll(strings.ReplaceAll(input, "?", ""), "*", "") == "" { + if strings.ReplaceAll(strings.ReplaceAll(basenameInput, "?", ""), "*", "") == "" { // special case, this is a glob that is only asterisks... do not process! - return searchRequest{ - searchBasis: searchByGlob, + return []searchRequest{ + { + searchBasis: searchByGlob, + // note: we let the parent caller attach the full glob value + }, + } + } + + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, + } +} + +func parseBasenameAltAndClassGlobSections(basenameInput string) []searchRequest { + // TODO: process escape sequences + + altStartCount := strings.Count(basenameInput, "{") + altEndCount := strings.Count(basenameInput, "}") + classStartCount := strings.Count(basenameInput, "[") + classEndCount := strings.Count(basenameInput, "]") + + if altStartCount != altEndCount || classStartCount != classEndCount { + // imbalanced braces, this is not a valid glob relative to just the basename + return []searchRequest{ + { + searchBasis: searchByGlob, + // note: we let the parent caller attach the full glob value + }, + } + } + + if classStartCount > 0 { + // parsing this is not supported at this time + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, + } + } + + // if the glob is the simplest list form, them allow for breaking into sub-searches + if altStartCount == 1 { + indexStartIsPrefix := strings.Index(basenameInput, "{") == 0 + indexEndIsSuffix := strings.Index(basenameInput, "}") == len(basenameInput)-1 + if indexStartIsPrefix && indexEndIsSuffix { + // this is a simple list, split it up + // e.g. {a,b,c} -> a, b, c + altSections := strings.Split(basenameInput[1:len(basenameInput)-1], ",") + if len(altSections) > 1 { + var requests []searchRequest + for _, altSection := range altSections { + basis := searchByBasename + if strings.ContainsAny(altSection, "*?") { + basis = searchByBasenameGlob + } + + requests = append(requests, searchRequest{ + searchBasis: basis, + value: altSection, + }) + } + return requests + } } } - return searchRequest{ - searchBasis: searchByBasenameGlob, - value: input, + // there is some sort of alt usage, but it is not a simple list... just treat it as a glob + return []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: basenameInput, + }, } } diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go index 53ea0208..b3727cf0 100644 --- a/pkg/filetree/glob_parser_test.go +++ b/pkg/filetree/glob_parser_test.go @@ -10,174 +10,309 @@ func Test_parseGlob(t *testing.T) { tests := []struct { name string glob string - want searchRequest + want []searchRequest }{ { name: "relative path", glob: "foo/bar/basename.txt", - want: searchRequest{ - searchBasis: searchByPath, - value: "foo/bar/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByPath, + value: "foo/bar/basename.txt", + }, }, }, { name: "absolute path", glob: "/foo/bar/basename.txt", - want: searchRequest{ - searchBasis: searchByPath, - value: "/foo/bar/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByPath, + value: "/foo/bar/basename.txt", + }, }, }, { name: "extension", glob: "*.txt", - want: searchRequest{ - searchBasis: searchByExtension, - value: ".txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + }, }, }, { name: "extension anywhere", glob: "**/*.txt", - want: searchRequest{ - searchBasis: searchByExtension, - value: ".txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + }, }, }, { name: "basename glob search with requirement", glob: "bas*nam?.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "bas*nam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, }, }, { name: "extension with path requirement", glob: "foo/bar/**/*.txt", - want: searchRequest{ - searchBasis: searchByExtension, - value: ".txt", - requirement: "foo/bar/**/*.txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + requirement: "foo/bar/**/*.txt", + }, }, }, { name: "basename but without a path prefix", glob: "basename.txt", - want: searchRequest{ - searchBasis: searchByPath, - value: "basename.txt", + want: []searchRequest{ + { + searchBasis: searchByPath, + value: "basename.txt", + }, }, }, { name: "basename anywhere", glob: "**/basename.txt", - want: searchRequest{ - searchBasis: searchByBasename, - value: "basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + }, }, }, { name: "basename with requirement", glob: "foo/b*/basename.txt", - want: searchRequest{ - searchBasis: searchByBasename, - value: "basename.txt", - requirement: "foo/b*/basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + requirement: "foo/b*/basename.txt", + }, }, }, { name: "basename glob", glob: "basename.*", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "basename.*", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basename.*", + }, }, }, { name: "basename glob with requirement", glob: "**/foo/bar/basename.*", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "basename.*", - requirement: "**/foo/bar/basename.*", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basename.*", + requirement: "**/foo/bar/basename.*", + }, }, }, { name: "basename wildcard glob with requirement", glob: "**/foo/bar/basenam?.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "basenam?.txt", - requirement: "**/foo/bar/basenam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basenam?.txt", + requirement: "**/foo/bar/basenam?.txt", + }, + }, + }, + { + name: "glob classes within a basename", + glob: "**/foo/bar/basena[me][me].txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basena[me][me].txt", + requirement: "**/foo/bar/basena[me][me].txt", + }, + }, + }, + { + name: "glob classes within a the path", + glob: "**/foo/[bB]ar/basena[me][me].txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "basena[me][me].txt", + requirement: "**/foo/[bB]ar/basena[me][me].txt", + }, + }, + }, + { + name: "alt clobbers basename extraction", + glob: "**/foo/bar/{nested/basena[me][me].txt,another.txt}", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/{nested/basena[me][me].txt,another.txt}", + }, + }, + }, + { + name: "class clobbers basename extraction", + glob: "**/foo/bar/[me][m/e].txt,another.txt", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/[me][m/e].txt,another.txt", + }, + }, + }, + { + name: "match alternative matches in the basename", + glob: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "Packages", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "rpmdb.sqlite", + requirement: "**/var/lib/rpm/{Packages,Packages.db,rpmdb.sqlite}", + }, + }, + }, + { + name: "match fallback to glob search on non-simple alternatives", + glob: "**/var/lib/rpm/{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + requirement: "**/var/lib/rpm/{Packa{ges}{GES},Packages.db,rpmdb.sqlite}", + }, + }, + }, + { + name: "dynamic extraction of basename and basename glob for alternatives", + glob: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "Pack???s", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, + { + searchBasis: searchByBasenameGlob, + value: "rpm*.sqlite", + requirement: "**/var/lib/rpm/{Pack???s,Packages.db,rpm*.sqlite}", + }, }, }, { name: "fallback to full glob search", glob: "**/foo/bar/*", - want: searchRequest{ - searchBasis: searchByGlob, - value: "**/foo/bar/*", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/bar/*", + }, }, }, // edge cases { name: "empty string", glob: "", - want: searchRequest{ - searchBasis: searchByPath, + want: []searchRequest{ + { + searchBasis: searchByPath, + }, }, }, { name: "only a slash", glob: "/", - want: searchRequest{ - searchBasis: searchByPath, - value: "/", + want: []searchRequest{ + { + searchBasis: searchByPath, + value: "/", + }, }, }, { name: "cleanup to single slash", glob: "///", - want: searchRequest{ - searchBasis: searchByPath, - value: "/", + want: []searchRequest{ + { + searchBasis: searchByPath, + value: "/", + }, }, }, { name: "ends with slash", glob: "/foo/b*r/", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "b*r", - requirement: "/foo/b*r", // note that the slash is removed since this should be a clean path + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "b*r", + requirement: "/foo/b*r", // note that the slash is removed since this should be a clean path + }, }, }, { name: "spaces around everything", glob: " /foo/b*r/ .txt ", - want: searchRequest{ - searchBasis: searchByBasename, - value: " .txt", // note the space - requirement: "/foo/b*r/ .txt", // note the space in the middle, but otherwise clean on the front and back + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: " .txt", // note the space + requirement: "/foo/b*r/ .txt", // note the space in the middle, but otherwise clean on the front and back + }, }, }, { name: "fallback to full glob search", glob: "**/foo/bar/***.*****.******", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "*.*.*", // note that the basename glob is cleaned up - requirement: "**/foo/bar/*.*.*", // note that the glob is cleaned up + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*.*.*", // note that the basename glob is cleaned up + requirement: "**/foo/bar/*.*.*", // note that the glob is cleaned up + }, }, }, { name: "odd glob input still honors basename searches", glob: "**/foo/**.***.****bar/***thin*.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "*thin*.txt", // note that the basename glob is cleaned up - requirement: "**/foo/*.*.*bar/*thin*.txt", // note that the glob is cleaned up + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", // note that the basename glob is cleaned up + requirement: "**/foo/*.*.*bar/*thin*.txt", // note that the glob is cleaned up + }, }, }, } @@ -192,83 +327,121 @@ func Test_parseGlobBasename(t *testing.T) { tests := []struct { name string input string - want searchRequest + want []searchRequest }{ { name: "empty string", input: "", - want: searchRequest{ - searchBasis: searchByBasename, + want: []searchRequest{ + { + searchBasis: searchByBasename, + }, }, }, { name: "everything-ish", input: "*?", - want: searchRequest{ - searchBasis: searchByGlob, + want: []searchRequest{ + { + searchBasis: searchByGlob, + }, }, }, { name: "everything recursive", input: "**", - want: searchRequest{ - searchBasis: searchByGlob, + want: []searchRequest{ + { + searchBasis: searchByGlob, + }, }, }, { name: "simple basename", input: "basename.txt", - want: searchRequest{ - searchBasis: searchByBasename, - value: "basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "basename.txt", + }, }, }, { name: "basename with prefix glob", input: "*basename.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "*basename.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*basename.txt", + }, }, }, { name: "basename with pattern", input: "bas*nam?.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "bas*nam?.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "bas*nam?.txt", + }, }, }, { name: "extension", input: "*.txt", - want: searchRequest{ - searchBasis: searchByExtension, - value: ".txt", + want: []searchRequest{ + { + searchBasis: searchByExtension, + value: ".txt", + }, }, }, { name: "possible extension that should be searched by glob", input: "*.*.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "*.*.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*.*.txt", + }, }, }, { name: "tricky basename", input: ".txt", - want: searchRequest{ - searchBasis: searchByBasename, - value: ".txt", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: ".txt", + }, }, }, { name: "basename glob with extension", input: "*thin*.txt", - want: searchRequest{ - searchBasis: searchByBasenameGlob, - value: "*thin*.txt", + want: []searchRequest{ + { + searchBasis: searchByBasenameGlob, + value: "*thin*.txt", + }, + }, + }, + { + name: "basename alternates", + input: "{Packages,Packages.db,rpmdb.sqlite}", + want: []searchRequest{ + { + searchBasis: searchByBasename, + value: "Packages", + }, + { + searchBasis: searchByBasename, + value: "Packages.db", + }, + { + searchBasis: searchByBasename, + value: "rpmdb.sqlite", + }, }, }, } diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index effb9c43..09bdd32c 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -61,7 +61,16 @@ func (i searchContext) SearchByGlob(pattern string, options ...LinkResolutionOpt return i.tree.FilesByGlob(pattern, options...) } - return i.searchByGlob(parseGlob(pattern), options...) + var allRefs []file.ReferenceAccessVia + for _, request := range parseGlob(pattern) { + refs, err := i.searchByGlob(request, options...) + if err != nil { + return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) + } + allRefs = append(allRefs, refs...) + } + + return allRefs, nil } func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { From d9f67326463e97087bddd1a50aceba16d23f82f1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 31 Jan 2023 09:17:49 -0500 Subject: [PATCH 10/35] add failing test to show that index shortcircuits correct behavior Signed-off-by: Alex Goodman --- pkg/filetree/search_test.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 766abfcd..6f79f1d5 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -100,7 +100,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { } tree := NewFileTree() - ref, err := tree.AddFile("/path/to/file.txt") + ref, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, ref) + + ref, err = tree.AddFile("/path/to/file.txt") require.NoError(t, err) require.NotNil(t, ref) @@ -135,6 +139,25 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, }, }, + }, { + name: "virtual path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/link-to-path/to/file.txt", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, }, { name: "path does not exists", From 61da85ba2ae6c371a35866fcd50217385d943585 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 11:07:08 -0500 Subject: [PATCH 11/35] add link resolution via filetree search context Signed-off-by: Alex Goodman --- pkg/file/id.go | 94 +++++ pkg/file/path_set.go | 58 ++- pkg/file/reference.go | 5 - pkg/filetree/filenode/filenode.go | 19 + pkg/filetree/filetree.go | 82 ++-- pkg/filetree/filetree_test.go | 37 +- pkg/filetree/index.go | 145 ++++--- pkg/filetree/search.go | 309 ++++++++++++--- pkg/filetree/search_test.go | 602 +++++++++++++++++++++++++++++- pkg/tree/depth_first_walker.go | 2 +- pkg/tree/node/id.go | 62 ++- pkg/tree/tree.go | 6 +- 12 files changed, 1270 insertions(+), 151 deletions(-) create mode 100644 pkg/file/id.go diff --git a/pkg/file/id.go b/pkg/file/id.go new file mode 100644 index 00000000..a6366bcf --- /dev/null +++ b/pkg/file/id.go @@ -0,0 +1,94 @@ +package file + +import "sort" + +var nextID = 0 // note: this is governed by the reference constructor + +// ID is used for file tree manipulation to uniquely identify tree nodes. +type ID uint64 + +type IDs []ID + +func (ids IDs) Len() int { + return len(ids) +} + +func (ids IDs) Less(i, j int) bool { + return ids[i] < ids[j] +} + +func (ids IDs) Swap(i, j int) { + ids[i], ids[j] = ids[j], ids[i] +} + +type IDSet map[ID]struct{} + +func NewIDSet() IDSet { + return make(IDSet) +} + +func (s IDSet) Size() int { + return len(s) +} + +func (s IDSet) Merge(other IDSet) { + for i := range other.Enumerate() { + s.Add(i) + } +} + +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } +} + +func (s IDSet) Contains(i ID) bool { + _, ok := s[i] + return ok +} + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + + sort.Sort(IDs(ret)) + + return ret +} + +func (s IDSet) Enumerate() <-chan ID { + ret := make(chan ID) + go func() { + defer close(ret) + for i := range s { + ret <- i + } + }() + return ret +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index fe7280fc..fedc3e07 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -6,15 +6,65 @@ func NewPathSet() PathSet { return make(PathSet) } -func (s PathSet) Add(i Path) { - s[i] = struct{}{} +func (s PathSet) Size() int { + return len(s) } -func (s PathSet) Remove(i Path) { - delete(s, i) +func (s PathSet) Merge(other PathSet) { + for i := range other.Enumerate() { + s.Add(i) + } +} + +func (s PathSet) Add(ids ...Path) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s PathSet) Remove(ids ...Path) { + for _, i := range ids { + delete(s, i) + } } func (s PathSet) Contains(i Path) bool { _, ok := s[i] return ok } + +func (s PathSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s PathSet) List() []Path { + ret := make([]Path, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s PathSet) Enumerate() <-chan Path { + ret := make(chan Path) + go func() { + defer close(ret) + for i := range s { + ret <- i + } + }() + return ret +} + +func (s PathSet) ContainsAny(ids ...Path) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/file/reference.go b/pkg/file/reference.go index 904cd022..ca2c12ff 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -7,11 +7,6 @@ import ( "github.com/scylladb/go-set/strset" ) -var nextID = 0 - -// ID is used for file tree manipulation to uniquely identify tree nodes. -type ID uint64 - // ReferenceAccess represents the fetching of a possibly non-existent file, and how it was accessed. type ReferenceAccess struct { RequestPath Path diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go index aa9b0fdf..ba2dcb3c 100644 --- a/pkg/filetree/filenode/filenode.go +++ b/pkg/filetree/filenode/filenode.go @@ -2,6 +2,7 @@ package filenode import ( "path" + "path/filepath" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/tree/node" @@ -70,3 +71,21 @@ func (n *FileNode) IsLink() bool { func IDByPath(p file.Path) node.ID { return node.ID(p) } + +func (n *FileNode) RenderLinkDestination() file.Path { + if !n.IsLink() { + return "" + } + + if n.LinkPath.IsAbsolutePath() { + // use links with absolute paths blindly + return n.LinkPath + } + + // resolve relative link paths + var parentDir string + parentDir, _ = filepath.Split(string(n.RealPath)) // TODO: alex: should this be path.Split, not filepath.Split? + + // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt" + return file.Path(path.Clean(path.Join(parentDir, string(n.LinkPath)))) +} diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 2dfb9e6e..555e736e 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "path" - "path/filepath" "strings" "github.com/anchore/stereoscope/internal" @@ -50,6 +49,36 @@ func (na *nodeAccess) HasFileNode() bool { return na.FileNode != nil } +func (na *nodeAccess) FileReferenceVia() *file.ReferenceAccessVia { + if !na.HasFileNode() { + return nil + } + return file.NewFileReferenceVia( + na.RequestPath, + na.FileNode.Reference, + newReferenceAccessPath(na.LeafLinkResolution), + ) +} + +func (na *nodeAccess) References() []file.Reference { + if !na.HasFileNode() { + return nil + } + var refs []file.Reference + + if na.FileNode.Reference != nil { + refs = append(refs, *na.FileNode.Reference) + } + + for _, l := range na.LeafLinkResolution { + if l.HasFileNode() && l.FileNode.Reference != nil { + refs = append(refs, *l.FileNode.Reference) + } + } + + return refs +} + // FileTree represents a file/directory Tree type FileTree struct { tree *tree.Tree @@ -145,6 +174,18 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { // File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) { + currentNode, err := t.file(path, options...) + if err != nil { + return false, nil, err + } + if currentNode.HasFileNode() { + return true, currentNode.FileReferenceVia(), err + } + return false, nil, err +} + +// file fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. +func (t *FileTree) file(path file.Path, options ...LinkResolutionOption) (*nodeAccess, error) { userStrategy := newLinkResolutionStrategy(options...) // For: /some/path/here // Where: /some/path -> /other/place @@ -165,14 +206,10 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, // hit, return it! If not, fallback to symlink resolution. currentNode, err := t.node(path, linkResolutionStrategy{}) if err != nil { - return false, nil, err + return nil, err } if currentNode.HasFileNode() && (!currentNode.FileNode.IsLink() || currentNode.FileNode.IsLink() && !userStrategy.FollowBasenameLinks) { - return true, file.NewFileReferenceVia( - path, - currentNode.FileNode.Reference, - newReferenceAccessPath(currentNode.LeafLinkResolution), - ), nil + return currentNode, nil } // symlink resolution!... within the context of container images (which is outside of the responsibility of this object) @@ -184,13 +221,9 @@ func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, DoNotFollowDeadBasenameLinks: userStrategy.DoNotFollowDeadBasenameLinks, }) if currentNode.HasFileNode() { - return true, file.NewFileReferenceVia( - path, - currentNode.FileNode.Reference, - newReferenceAccessPath(currentNode.LeafLinkResolution), - ), err + return currentNode, err } - return false, nil, err + return nil, err } func newReferenceAccessPath(nodePath []nodeAccess) []file.ReferenceAccess { @@ -234,6 +267,9 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce if strategy.FollowAncestorLinks { currentNode, err = t.resolveAncestorLinks(normalizedPath, nil) if err != nil { + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } return currentNode, err } } else { @@ -248,12 +284,19 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // link resolution has come up with nothing, return what we have so far if !currentNode.HasFileNode() { + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } return currentNode, nil } if strategy.FollowBasenameLinks { currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil) } + if currentNode != nil { + currentNode.RequestPath = normalizedPath + } + return currentNode, err } @@ -331,7 +374,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. return currentNodeAccess, nil } -// followNode takes the given FileNode and resolves all links at the base of the real path for the node (this implies +// resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*nodeAccess, error) { @@ -379,16 +422,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // prepare for the next iteration alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) - if currentNodeAccess.FileNode.LinkPath.IsAbsolutePath() { - // use links with absolute paths blindly - nextPath = currentNodeAccess.FileNode.LinkPath - } else { - // resolve relative link paths - var parentDir string - parentDir, _ = filepath.Split(string(currentNodeAccess.FileNode.RealPath)) - // assemble relative link path by normalizing: "/cur/dir/../file1.txt" --> "/cur/file1.txt" - nextPath = file.Path(path.Clean(path.Join(parentDir, string(currentNodeAccess.FileNode.LinkPath)))) - } + nextPath = currentNodeAccess.FileNode.RenderLinkDestination() // no more links to follow if string(nextPath) == "" { diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 657942bd..987958d3 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -2,7 +2,6 @@ package filetree import ( "errors" - "fmt" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" @@ -87,6 +86,40 @@ func TestFileTree_RemovePath(t *testing.T) { } } +func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) { + var err error + tr := NewFileTree() + + _, err = tr.AddSymLink("/parent-link", "/parent") + require.NoError(t, err) + + _, err = tr.AddDir("/parent") + require.NoError(t, err) + + expectedRef, err := tr.AddFile("/parent/file.txt") + require.NoError(t, err) + + expected := []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/parent-link/file.txt", + Reference: expectedRef, + }, + LeafLinkResolution: nil, + }, + } + + requestGlob := "**/parent-link/file.txt" + linkOptions := []LinkResolutionOption{FollowBasenameLinks} + ref, err := tr.FilesByGlob(requestGlob, linkOptions...) + require.NoError(t, err) + + opt := cmp.AllowUnexported(file.Reference{}) + if d := cmp.Diff(expected, ref, opt); d != "" { + t.Errorf("unexpected file reference (-want +got):\n%s", d) + } +} + func TestFileTree_FilesByGlob(t *testing.T) { tr := NewFileTree() @@ -954,7 +987,7 @@ func TestFileTree_File_Symlink(t *testing.T) { } for _, test := range tests { - t.Run(fmt.Sprintf("%s (follow=%+v)", test.name, test.linkOptions), func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { tr := NewFileTree() _, err := tr.AddSymLink(test.buildLinkSource, test.buildLinkDest) if err != nil { diff --git a/pkg/filetree/index.go b/pkg/filetree/index.go index 89e74ee7..e5e7369e 100644 --- a/pkg/filetree/index.go +++ b/pkg/filetree/index.go @@ -8,6 +8,8 @@ import ( "strings" "sync" + "github.com/anchore/stereoscope/internal/log" + "github.com/anchore/stereoscope/pkg/file" "github.com/becheran/wildmatch-go" "github.com/scylladb/go-set/strset" @@ -21,9 +23,10 @@ type Index interface { type IndexReader interface { Exists(f file.Reference) bool Get(f file.Reference) (IndexEntry, error) - GetByMIMEType(mType string) ([]IndexEntry, error) - GetByExtension(extension string) ([]IndexEntry, error) - GetByBasename(basename string) ([]IndexEntry, error) + GetByMIMEType(mTypes ...string) ([]IndexEntry, error) + GetByFileType(fTypes ...file.Type) ([]IndexEntry, error) + GetByExtension(extensions ...string) ([]IndexEntry, error) + GetByBasename(basenames ...string) ([]IndexEntry, error) GetByBasenameGlob(globs ...string) ([]IndexEntry, error) Basenames() []string } @@ -37,9 +40,10 @@ type IndexWriter interface { type index struct { *sync.RWMutex index map[file.ID]IndexEntry - byMIMEType map[string][]file.ID - byExtension map[string][]file.ID - byBasename map[string][]file.ID + byFileType map[file.Type]file.IDSet + byMIMEType map[string]file.IDSet + byExtension map[string]file.IDSet + byBasename map[string]file.IDSet basenames *strset.Set } @@ -48,9 +52,10 @@ func NewIndex() Index { return &index{ RWMutex: &sync.RWMutex{}, index: make(map[file.ID]IndexEntry), - byMIMEType: make(map[string][]file.ID), - byExtension: make(map[string][]file.ID), - byBasename: make(map[string][]file.ID), + byFileType: make(map[file.Type]file.IDSet), + byMIMEType: make(map[string]file.IDSet), + byExtension: make(map[string]file.IDSet), + byBasename: make(map[string]file.IDSet), basenames: strset.New(), } } @@ -66,22 +71,43 @@ type IndexEntry struct { func (c *index) Add(f file.Reference, m file.Metadata) { c.Lock() defer c.Unlock() + id := f.ID() + if _, ok := c.index[id]; ok { + log.WithFields("id", id, "path", f.RealPath).Debug("overwriting existing file index entry") + } + if m.MIMEType != "" { + if _, ok := c.byMIMEType[m.MIMEType]; !ok { + c.byMIMEType[m.MIMEType] = file.NewIDSet() + } // an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have // the contents and the MIME type could not be determined then the default value is application/octet-stream. - c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], id) + c.byMIMEType[m.MIMEType].Add(id) } basename := path.Base(string(f.RealPath)) - c.byBasename[basename] = append(c.byBasename[basename], id) + + if _, ok := c.byBasename[basename]; !ok { + c.byBasename[basename] = file.NewIDSet() + } + + c.byBasename[basename].Add(id) c.basenames.Add(basename) for _, ext := range fileExtensions(string(f.RealPath)) { - c.byExtension[ext] = append(c.byExtension[ext], id) + if _, ok := c.byExtension[ext]; !ok { + c.byExtension[ext] = file.NewIDSet() + } + c.byExtension[ext].Add(id) } + if _, ok := c.byFileType[m.Type]; !ok { + c.byFileType[m.Type] = file.NewIDSet() + } + c.byFileType[m.Type].Add(id) + c.index[id] = IndexEntry{ Reference: f, Metadata: m, @@ -118,68 +144,101 @@ func (c *index) Basenames() []string { return bns } -func (c *index) GetByMIMEType(mType string) ([]IndexEntry, error) { +func (c *index) GetByFileType(fTypes ...file.Type) ([]IndexEntry, error) { c.RLock() defer c.RUnlock() - fileIDs, ok := c.byMIMEType[mType] - if !ok { - return nil, nil - } - var entries []IndexEntry - for _, id := range fileIDs { - entry, ok := c.index[id] + + for _, fType := range fTypes { + fileIDs, ok := c.byFileType[fType] if !ok { - return nil, os.ErrNotExist + continue + } + + for _, id := range fileIDs.List() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) } - entries = append(entries, entry) } return entries, nil } -func (c *index) GetByExtension(extension string) ([]IndexEntry, error) { +func (c *index) GetByMIMEType(mTypes ...string) ([]IndexEntry, error) { c.RLock() defer c.RUnlock() - fileIDs, ok := c.byExtension[extension] - if !ok { - return nil, nil - } - var entries []IndexEntry - for _, id := range fileIDs { - entry, ok := c.index[id] + + for _, mType := range mTypes { + fileIDs, ok := c.byMIMEType[mType] if !ok { - return nil, os.ErrNotExist + continue + } + + for _, id := range fileIDs.List() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) } - entries = append(entries, entry) } return entries, nil } -func (c *index) GetByBasename(basename string) ([]IndexEntry, error) { +func (c *index) GetByExtension(extensions ...string) ([]IndexEntry, error) { c.RLock() defer c.RUnlock() - if strings.Contains(basename, "/") { - return nil, fmt.Errorf("found directory separator in a basename") - } + var entries []IndexEntry - fileIDs, ok := c.byBasename[basename] - if !ok { - return nil, nil + for _, extension := range extensions { + fileIDs, ok := c.byExtension[extension] + if !ok { + continue + } + + for _, id := range fileIDs.List() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) + } } + return entries, nil +} + +func (c *index) GetByBasename(basenames ...string) ([]IndexEntry, error) { + c.RLock() + defer c.RUnlock() + var entries []IndexEntry - for _, id := range fileIDs { - entry, ok := c.index[id] + + for _, basename := range basenames { + if strings.Contains(basename, "/") { + return nil, fmt.Errorf("found directory separator in a basename") + } + + fileIDs, ok := c.byBasename[basename] if !ok { - return nil, os.ErrNotExist + continue + } + + for _, id := range fileIDs.List() { + entry, ok := c.index[id] + if !ok { + return nil, os.ErrNotExist + } + entries = append(entries, entry) } - entries = append(entries, entry) } return entries, nil diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 09bdd32c..c94002dc 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -2,8 +2,12 @@ package filetree import ( "fmt" + "path" + "sort" "github.com/anchore/stereoscope/internal/log" + "github.com/anchore/stereoscope/pkg/filetree/filenode" + "github.com/anchore/stereoscope/pkg/tree/node" "github.com/anchore/stereoscope/pkg/file" "github.com/bmatcuk/doublestar/v4" @@ -17,53 +21,110 @@ type Searcher interface { } type searchContext struct { - tree Reader // this is the tree which all index search results are filtered against - index Index // this index is relative to one or more trees, not just necessarily one + tree *FileTree // this is the tree which all index search results are filtered against + index Index // this index is relative to one or more trees, not just necessarily one + + // the following enables correct link resolution when searching via the index + linkForwardRef map[node.ID]node.ID // {link-node-id: link-destination-node-id} + linkBackwardRefs map[node.ID]node.IDSet // {link-destination-node-id: str([link-node-id, ...])} + // allLinks node.IDSet // set([link-node-id, ...]) // all links, regardless of whether they are resolved or not + // unresolvedLinks node.IDSet // set([link-node-id, ...]) // we have not figured the forward/backward refs for these links yet +} + +func NewSearchContext(tree *FileTree, index Index) Searcher { + c := &searchContext{ + tree: tree, + index: index, + linkForwardRef: make(map[node.ID]node.ID), + linkBackwardRefs: make(map[node.ID]node.IDSet), + } + + if err := c.buildLinkResolutionIndex(); err != nil { + log.WithFields("error", err).Warn("unable to build link resolution index for filetree search context") + } + + return c } -func NewSearchContext(tree Reader, index Index) Searcher { - return &searchContext{ - tree: tree, - index: index, +func (sc *searchContext) buildLinkResolutionIndex() error { + entries, err := sc.index.GetByFileType(file.TypeSymlink, file.TypeHardLink) + if err != nil { + return err + } + + // filter the results relative to the tree + nodes, err := sc.fileNodesInTree(entries) + if err != nil { + return err + } + + // note: the remaining references are all links that exist in the tree + + for _, fn := range nodes { + destinationFna, err := sc.tree.file(fn.RenderLinkDestination()) + if err != nil { + return fmt.Errorf("unable to get node for path=%q: %w", fn.RealPath, err) + } + + if !destinationFna.HasFileNode() { + // we were unable to resolve the link destination, this could be due to the fact that the destination simply + continue + } + + linkID := fn.ID() + destinationID := destinationFna.FileNode.ID() + + // add forward reference... + sc.linkForwardRef[linkID] = destinationID + + // add backward reference... + if _, ok := sc.linkBackwardRefs[destinationID]; !ok { + sc.linkBackwardRefs[destinationID] = node.NewIDSet() + } + sc.linkBackwardRefs[destinationID].Add(linkID) } + + return nil } -func (i searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) { +func (sc searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) { // TODO: one day this could leverage indexes outside of the tree, but today this is not implemented log.WithFields("path", path).Trace("searching filetree by path") options = append(options, FollowBasenameLinks) - _, ref, err := i.tree.File(file.Path(path), options...) + _, ref, err := sc.tree.File(file.Path(path), options...) return ref, err } -func (i searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { log.WithFields("types", mimeTypes).Trace("searching filetree by MIME types") var fileEntries []IndexEntry for _, mType := range mimeTypes { - entries, err := i.index.GetByMIMEType(mType) + entries, err := sc.index.GetByMIMEType(mType) if err != nil { return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err) } fileEntries = append(fileEntries, entries...) } - return i.filterIndexEntriesRelativeToTree(fileEntries) + return sc.referencesInTree(fileEntries) } -func (i searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { +// add case for status.d/* like things that hook up directly into filetree.ListPaths() + +func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { log.WithFields("glob", pattern).Trace("searching filetree by glob") - if i.index == nil { + if sc.index == nil { options = append(options, FollowBasenameLinks) - return i.tree.FilesByGlob(pattern, options...) + return sc.tree.FilesByGlob(pattern, options...) } var allRefs []file.ReferenceAccessVia for _, request := range parseGlob(pattern) { - refs, err := i.searchByGlob(request, options...) + refs, err := sc.searchByGlob(request, options...) if err != nil { return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) } @@ -73,11 +134,11 @@ func (i searchContext) SearchByGlob(pattern string, options ...LinkResolutionOpt return allRefs, nil } -func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { switch request.searchBasis { case searchByPath: options = append(options, FollowBasenameLinks) - ref, err := i.SearchByPath(request.value, options...) + ref, err := sc.SearchByPath(request.value, options...) if err != nil { return nil, err } @@ -86,31 +147,31 @@ func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolu } return []file.ReferenceAccessVia{*ref}, nil case searchByBasename: - indexes, err := i.index.GetByBasename(request.value) + indexes, err := sc.index.GetByBasename(request.value) if err != nil { return nil, fmt.Errorf("unable to search by basename=%q: %w", request.value, err) } - refs, err := i.filterIndexEntries(request.requirement, indexes) + refs, err := sc.referencesWithRequirement(request.requirement, indexes) if err != nil { return nil, err } return refs, nil case searchByBasenameGlob: - indexes, err := i.index.GetByBasenameGlob(request.value) + indexes, err := sc.index.GetByBasenameGlob(request.value) if err != nil { return nil, fmt.Errorf("unable to search by basename-glob=%q: %w", request.value, err) } - refs, err := i.filterIndexEntries(request.requirement, indexes) + refs, err := sc.referencesWithRequirement(request.requirement, indexes) if err != nil { return nil, err } return refs, nil case searchByExtension: - indexes, err := i.index.GetByExtension(request.value) + indexes, err := sc.index.GetByExtension(request.value) if err != nil { return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) } - refs, err := i.filterIndexEntries(request.requirement, indexes) + refs, err := sc.referencesWithRequirement(request.requirement, indexes) if err != nil { return nil, err } @@ -119,64 +180,214 @@ func (i searchContext) searchByGlob(request searchRequest, options ...LinkResolu log.WithFields("glob", request.value).Trace("glob provided is an expensive search, consider using a more specific indexed search") options = append(options, FollowBasenameLinks) - return i.tree.FilesByGlob(request.value, options...) + return sc.tree.FilesByGlob(request.value, options...) } return nil, fmt.Errorf("invalid search request: %+v", request.searchBasis) } -func (i searchContext) filterIndexEntries(requirement string, entries []IndexEntry) ([]file.ReferenceAccessVia, error) { - refs, err := i.filterIndexEntriesRelativeToTree(entries) +func (sc searchContext) referencesWithRequirement(requirement string, entries []IndexEntry) ([]file.ReferenceAccessVia, error) { + refs, err := sc.referencesInTree(entries) if err != nil { return nil, err } + if requirement == "" { + return refs, nil + } + var results []file.ReferenceAccessVia for _, ref := range refs { - if requirement != "" { - var foundMatchingRequirement bool - for _, p := range ref.AllPaths() { - matched, err := doublestar.Match(requirement, string(p)) - if err != nil { - return nil, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) - } - if matched { - foundMatchingRequirement = true - break - } + var foundMatchingRequirement bool + allRefPaths := ref.AllPaths() + for _, p := range allRefPaths { + matched, err := doublestar.Match(requirement, string(p)) + if err != nil { + return nil, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) } - if !foundMatchingRequirement { - continue + if matched { + foundMatchingRequirement = true + break } } + if !foundMatchingRequirement { + continue + } results = append(results, ref) } return results, nil } -func (i searchContext) filterIndexEntriesRelativeToTree(fileEntries []IndexEntry) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia +func (sc searchContext) allPathsToNode(fn *filenode.FileNode) ([]file.Path, error) { + if fn == nil { + return nil, nil + } + + observedPaths := file.NewPathSet() + + paths, err := sc.pathsToNode(fn, "", observedPaths) + if err != nil { + return nil, err + } + + pathsList := paths.List() + sort.Sort(file.Paths(pathsList)) + + // TODO: filter to only paths that exist in the tree + + return pathsList, nil +} + +func (sc searchContext) pathsToNode(fn *filenode.FileNode, suffix string, observedPaths file.PathSet) (file.PathSet, error) { + if fn == nil { + return nil, nil + } + + if observedPaths != nil { + if observedPaths.Contains(fn.RealPath) { + return nil, fmt.Errorf("found circular reference to path=%q", fn.RealPath) + } + observedPaths.Add(fn.RealPath) + } + + paths := file.NewPathSet() + nodeID := fn.ID() + + addPath := func(suffix string, ps ...file.Path) { + for _, p := range ps { + if suffix != "" { + p = file.Path(path.Join(string(p), suffix)) + } + paths.Add(p) + } + } + + if suffix == "" { + addPath(suffix, fn.RealPath) + } + + // add all paths to the node that are linked to it + for linkSrcID := range sc.linkBackwardRefs[nodeID].Enumerate() { + pfn := sc.tree.tree.Node(linkSrcID) + if pfn == nil { + log.WithFields("id", nodeID, "parent", linkSrcID).Warn("found non-existent parent link") + continue + } + linkSrcPaths, err := sc.pathsToNode(pfn.(*filenode.FileNode), "", observedPaths) + if err != nil { + return nil, err + } + + addPath(suffix, linkSrcPaths.List()...) + } + + // crawl up the tree to find all paths to our parent and repeat + return paths, sc.pathsToParents(paths) +} + +func (sc searchContext) pathsToParents(paths file.PathSet) error { + for p := range paths.Enumerate() { + nextNestedSuffix := p.Basename() + allParentPaths := p.ConstituentPaths() + sort.Sort(sort.Reverse(file.Paths(allParentPaths))) + for _, pp := range allParentPaths { + if pp == "/" { + break + } + + nestedSuffix := nextNestedSuffix + nextNestedSuffix = path.Join(pp.Basename(), nestedSuffix) + + pna, err := sc.tree.node(pp, linkResolutionStrategy{ + FollowAncestorLinks: true, + FollowBasenameLinks: false, + }) + if err != nil { + return fmt.Errorf("unable to get parent node for path=%q: %w", pp, err) + } + + if !pna.HasFileNode() { + continue + } + + parentLinkPaths, err := sc.pathsToNode(pna.FileNode, nestedSuffix, nil) + if err != nil { + return err + } + paths.Merge(parentLinkPaths) + } + } + return nil +} + +func (sc searchContext) fileNodesInTree(fileEntries []IndexEntry) ([]*filenode.FileNode, error) { + var nodes []*filenode.FileNode allFileEntries: for _, entry := range fileEntries { - _, ref, err := i.tree.File(entry.Reference.RealPath, FollowBasenameLinks) + // note: it is important that we don't enable any basename link resolution + na, err := sc.tree.file(entry.Reference.RealPath) if err != nil { return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.Reference.RealPath, err) } - if !ref.HasReference() { + if !na.HasFileNode() { continue } - for _, accessRef := range ref.ResolutionReferences() { - if accessRef.ID() == entry.Reference.ID() { - // we know this entry exists in the tree, keep track of the reference for this file - refs = append(refs, *ref) - continue allFileEntries - } + // only check the resolved node matches the index entries reference, not via link resolutions... + if na.FileNode.Reference != nil && na.FileNode.Reference.ID() == entry.Reference.ID() { + nodes = append(nodes, na.FileNode) + continue allFileEntries } // we did not find a matching file ID in the tree, so drop this entry } + return nodes, nil +} + +// referencesInTree does two things relative to the index entries given: +// 1) it expands the index entries to include all possible access paths to the file node (by considering all possible link resolutions) +// 2) it filters the index entries to only include those that exist in the tree +func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.ReferenceAccessVia, error) { + var refs []file.ReferenceAccessVia + + for _, entry := range fileEntries { + na, err := sc.tree.file(entry.Reference.RealPath, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", entry.Reference.RealPath, err) + } + + // this filters out any index entries that do not exist in the tree + if !na.HasFileNode() { + continue + } + + // expand the index results with more possible access paths from the link resolution cache + var expandedRefs []file.ReferenceAccessVia + allPathsToNode, err := sc.allPathsToNode(na.FileNode) + if err != nil { + return nil, fmt.Errorf("unable to get all paths to node for path=%q: %w", entry.Reference.RealPath, err) + } + for _, p := range allPathsToNode { + _, ref, err := sc.tree.File(p, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to get ref for path=%q: %w", p, err) + } + if !ref.HasReference() { + continue + } + expandedRefs = append(expandedRefs, *ref) + } + + for _, ref := range expandedRefs { + for _, accessRef := range ref.ResolutionReferences() { + if accessRef.ID() == entry.Reference.ID() { + // we know this entry exists in the tree, keep track of the reference for this file + refs = append(refs, ref) + } + } + } + } return refs, nil } diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 6f79f1d5..072e46a9 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -3,8 +3,10 @@ package filetree import ( "fmt" "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "testing" ) @@ -100,16 +102,27 @@ func Test_searchContext_SearchByGlob(t *testing.T) { } tree := NewFileTree() - ref, err := tree.AddSymLink("/link-to-path", "/path") + doubleLinkToPathRef, err := tree.AddSymLink("/double-link-to-path", "/link-to-path") require.NoError(t, err) - require.NotNil(t, ref) + require.NotNil(t, doubleLinkToPathRef) - ref, err = tree.AddFile("/path/to/file.txt") + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") require.NoError(t, err) - require.NotNil(t, ref) + require.NotNil(t, linkToPathRef) + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*ref, file.Metadata{MIMEType: "plain/text"}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*doubleLinkToPathRef, file.Metadata{Type: file.TypeSymlink}) defaultFields := fields{ tree: tree, @@ -130,6 +143,38 @@ func Test_searchContext_SearchByGlob(t *testing.T) { glob: "/**/t?/fil?.txt", }, want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/link-to-file", + }, + }, + }, + }, + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, { ReferenceAccess: file.ReferenceAccess{ RequestPath: "/path/to/file.txt", @@ -139,8 +184,74 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, }, }, - }, { - name: "virtual path exists", + }, + { + name: "ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/link-to-path/to/file.txt", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, + { + name: "multi ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/double-link-to-path/to/file.txt", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, + { + name: "leaf access path exists", + fields: defaultFields, + args: args{ + glob: "**/link-to-file", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + LeafLinkResolution: []file.ReferenceAccess{ + { + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/link-to-file", + }, + }, + }, + }, + }, + }, + { + name: "ancestor access path exists", fields: defaultFields, args: args{ // note: this is a glob through a symlink (ancestor). If not using the index, this will work @@ -173,11 +284,8 @@ func Test_searchContext_SearchByGlob(t *testing.T) { if tt.wantErr == nil { tt.wantErr = require.NoError } - i := searchContext{ - tree: tt.fields.tree, - index: tt.fields.index, - } - got, err := i.SearchByGlob(tt.args.glob, tt.args.options...) + sc := NewSearchContext(tt.fields.tree, tt.fields.index) + got, err := sc.SearchByGlob(tt.args.glob, tt.args.options...) tt.wantErr(t, err, fmt.Sprintf("SearchByGlob(%v, %v)", tt.args.glob, tt.args.options)) if err != nil { return @@ -274,3 +382,473 @@ func Test_searchContext_SearchByMIMEType(t *testing.T) { }) } } + +func Test_searchContext_allPathsToNode(t *testing.T) { + type input struct { + query *filenode.FileNode + sc *searchContext + } + + tests := []struct { + name string + input input + want []file.Path + wantErr require.ErrorAssertionFunc + }{ + { + name: "dead symlink", + want: []file.Path{ + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + deafLinkRef, err := tree.AddSymLink("/link-to-file", "/path/to/dead/file.txt") + require.NoError(t, err) + require.NotNil(t, deafLinkRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*deafLinkRef, file.Metadata{Type: file.TypeSymlink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "symlink triangle cycle", + wantErr: require.Error, + input: func() input { + tree := NewFileTree() + + link1, err := tree.AddSymLink("/1", "/2") + require.NoError(t, err) + require.NotNil(t, link1) + + link2, err := tree.AddSymLink("/2", "/3") + require.NoError(t, err) + require.NotNil(t, link2) + + link3, err := tree.AddSymLink("/3", "/1") + require.NoError(t, err) + require.NotNil(t, link3) + + idx := NewIndex() + idx.Add(*link1, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*link2, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*link3, file.Metadata{Type: file.TypeSymlink}) + + na, err := tree.node(link1.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, link1.ID(), na.FileNode.Reference.ID(), "query node should be the same as the first link") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "single leaf symlink", + want: []file.Path{ + "/link-to-file", + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep leaf symlink", + want: []file.Path{ + "/double-link-to-file", + "/link-to-file", + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + doubleLinkToFileRef, err := tree.AddSymLink("/double-link-to-file", "/link-to-file") + require.NoError(t, err) + require.NotNil(t, doubleLinkToFileRef) + + linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, linkToFileRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*doubleLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "single ancestor symlink", + want: []file.Path{ + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, single sibling ancestor symlink", + want: []file.Path{ + "/link-to-path/to/file.txt", + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, multiple sibling ancestor symlink", + want: []file.Path{ + "/another-link-to-path/to/file.txt", + "/another-link-to-to/file.txt", + "/link-to-path/to/file.txt", + "/link-to-to/file.txt", + "/path/to/file.txt", + }, + input: func() input { + tree := NewFileTree() + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + anotherLinkToPathRef, err := tree.AddSymLink("/another-link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, anotherLinkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + anotherLinkToToRef, err := tree.AddSymLink("/another-link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, anotherLinkToToRef) + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*anotherLinkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*anotherLinkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "2 deep, multiple nested ancestor symlink", + want: []file.Path{ + "/link-to-path/link-to-another/file.txt", + "/link-to-path/to/another/file.txt", + "/link-to-path/to/link-to-file", + "/link-to-to/another/file.txt", + "/link-to-to/link-to-file", + "/path/link-to-another/file.txt", + "/path/to/another/file.txt", + "/path/to/link-to-file", + }, + input: func() input { + tree := NewFileTree() + + linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "/link-to-to/another") + require.NoError(t, err) + require.NotNil(t, linkToAnotherViaLinkRef) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "/path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "/path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + pathToLinkToFileRef, err := tree.AddSymLink("/path/to/link-to-file", "/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, pathToLinkToFileRef) + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + dirAnother, err := tree.AddDir("/path/to/another") + require.NoError(t, err) + require.NotNil(t, dirAnother) + + fileRef, err := tree.AddFile("/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDir}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + { + name: "relative, 2 deep, multiple nested ancestor symlink", + want: []file.Path{ + "/link-to-path/link-to-another/file.txt", + "/link-to-path/to/another/file.txt", + "/link-to-path/to/link-to-file", + "/link-to-to/another/file.txt", + "/link-to-to/link-to-file", + "/path/link-to-another/file.txt", + "/path/to/another/file.txt", + "/path/to/link-to-file", + }, + input: func() input { + tree := NewFileTree() + + linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "../link-to-to/another") + require.NoError(t, err) + require.NotNil(t, linkToAnotherViaLinkRef) + + linkToPathRef, err := tree.AddSymLink("/link-to-path", "./path") + require.NoError(t, err) + require.NotNil(t, linkToPathRef) + + linkToToRef, err := tree.AddSymLink("/link-to-to", "./path/to") + require.NoError(t, err) + require.NotNil(t, linkToToRef) + + pathToLinkToFileRef, err := tree.AddSymLink("/path/to/link-to-file", "../to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, pathToLinkToFileRef) + + dirTo, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, dirTo) + + dirAnother, err := tree.AddDir("/path/to/another") + require.NoError(t, err) + require.NotNil(t, dirAnother) + + fileRef, err := tree.AddFile("/path/to/another/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDir}) + + na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, fileRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as the file node") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + got, err := tt.input.sc.allPathsToNode(tt.input.query) + tt.wantErr(t, err, fmt.Sprintf("allPathsToNode(%v)", tt.input.query)) + if err != nil { + return + } + assert.ElementsMatchf(t, tt.want, got, cmp.Diff(tt.want, got)) + }) + } +} diff --git a/pkg/tree/depth_first_walker.go b/pkg/tree/depth_first_walker.go index 6870de1d..0efd677d 100644 --- a/pkg/tree/depth_first_walker.go +++ b/pkg/tree/depth_first_walker.go @@ -27,7 +27,7 @@ type DepthFirstWalker struct { visitor NodeVisitor tree Reader stack node.Stack - visited node.Set + visited node.IDSet conditions WalkConditions } diff --git a/pkg/tree/node/id.go b/pkg/tree/node/id.go index b84d2c09..706a8bf5 100644 --- a/pkg/tree/node/id.go +++ b/pkg/tree/node/id.go @@ -2,21 +2,67 @@ package node type ID string -type Set map[ID]struct{} +type IDSet map[ID]struct{} -func NewIDSet() Set { - return make(Set) +func NewIDSet() IDSet { + return make(IDSet) } -func (s Set) Add(i ID) { - s[i] = struct{}{} +func (s IDSet) Merge(other IDSet) { + for i := range other.Enumerate() { + s.Add(i) + } } -func (s Set) Remove(i ID) { - delete(s, i) +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } } -func (s Set) Contains(i ID) bool { +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } +} + +func (s IDSet) Contains(i ID) bool { _, ok := s[i] return ok } + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s IDSet) Enumerate() <-chan ID { + ret := make(chan ID) + go func() { + defer close(ret) + for i := range s { + ret <- i + } + }() + return ret +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/tree/tree.go b/pkg/tree/tree.go index 1b5079fd..0dc04557 100644 --- a/pkg/tree/tree.go +++ b/pkg/tree/tree.go @@ -8,9 +8,9 @@ import ( // Tree represents a simple Tree data structure. type Tree struct { - nodes map[node.ID]node.Node - children map[node.ID]map[node.ID]node.Node - parent map[node.ID]node.Node + nodes map[node.ID]node.Node // {node-id: node} + children map[node.ID]map[node.ID]node.Node // {parent-id: {child-id: child-node} + parent map[node.ID]node.Node // {child-id: parent-node} } // NewTree returns an instance of a Tree. From 1cf1864552538f67a4082d83ce64b85edb830d15 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 13:57:19 -0500 Subject: [PATCH 12/35] allow index symlink resolution to function through cycles Signed-off-by: Alex Goodman --- pkg/file/id.go | 13 +-- pkg/file/path_set.go | 13 +-- pkg/filetree/glob.go | 38 ++++---- pkg/filetree/search.go | 71 ++++++++++----- pkg/filetree/search_test.go | 93 +++++++++++++++++++- pkg/image/image.go | 15 ++-- pkg/image/layer.go | 18 ++-- pkg/tree/node/id.go | 13 +-- test/integration/mime_type_detection_test.go | 2 +- 9 files changed, 181 insertions(+), 95 deletions(-) diff --git a/pkg/file/id.go b/pkg/file/id.go index a6366bcf..4f978533 100644 --- a/pkg/file/id.go +++ b/pkg/file/id.go @@ -32,7 +32,7 @@ func (s IDSet) Size() int { } func (s IDSet) Merge(other IDSet) { - for i := range other.Enumerate() { + for _, i := range other.List() { s.Add(i) } } @@ -72,17 +72,6 @@ func (s IDSet) List() []ID { return ret } -func (s IDSet) Enumerate() <-chan ID { - ret := make(chan ID) - go func() { - defer close(ret) - for i := range s { - ret <- i - } - }() - return ret -} - func (s IDSet) ContainsAny(ids ...ID) bool { for _, i := range ids { _, ok := s[i] diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index fedc3e07..97df52ac 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -11,7 +11,7 @@ func (s PathSet) Size() int { } func (s PathSet) Merge(other PathSet) { - for i := range other.Enumerate() { + for _, i := range other.List() { s.Add(i) } } @@ -48,17 +48,6 @@ func (s PathSet) List() []Path { return ret } -func (s PathSet) Enumerate() <-chan Path { - ret := make(chan Path) - go func() { - defer close(ret) - for i := range s { - ret <- i - } - }() - return ret -} - func (s PathSet) ContainsAny(ids ...Path) bool { for _, i := range ids { _, ok := s[i] diff --git a/pkg/filetree/glob.go b/pkg/filetree/glob.go index a68fba2e..06e9d204 100644 --- a/pkg/filetree/glob.go +++ b/pkg/filetree/glob.go @@ -45,14 +45,16 @@ func isInPathResolutionLoop(path string, ft *FileTree) (bool, error) { allPathSet := file.NewPathSet() allPaths := file.Path(path).AllPaths() for _, p := range allPaths { - fn, err := ft.node(p, linkResolutionStrategy{ + fna, err := ft.node(p, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return false, err } - allPathSet.Add(file.Path(fn.FileNode.ID())) + if fna.HasFileNode() { + allPathSet.Add(file.Path(fna.FileNode.ID())) + } } // we want to allow for getting children out of the first iteration of a infinite path, but NOT allowing // beyond the second iteration down an infinite path. @@ -86,23 +88,23 @@ func (f *fileAdapter) ReadDir(n int) ([]fs.DirEntry, error) { return nil, os.ErrInvalid } var ret = make([]fs.DirEntry, 0) - fn, err := f.filetree.node(file.Path(f.name), linkResolutionStrategy{ + fna, err := f.filetree.node(file.Path(f.name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return ret, err } - if fn == nil { + if !fna.HasFileNode() { return ret, nil } - isInPathResolutionLoop, err := isInPathResolutionLoop(f.name, f.filetree) - if err != nil || isInPathResolutionLoop { + isInLoop, err := isInPathResolutionLoop(f.name, f.filetree) + if err != nil || isInLoop { return ret, err } - for idx, child := range f.filetree.tree.Children(fn.FileNode) { + for idx, child := range f.filetree.tree.Children(fna.FileNode) { if idx == n && n != -1 { break } @@ -125,23 +127,23 @@ type osAdapter struct { func (a *osAdapter) ReadDir(name string) ([]fs.DirEntry, error) { var ret = make([]fs.DirEntry, 0) - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, }) if err != nil { return ret, err } - if fn == nil { + if !fna.HasFileNode() { return ret, nil } - isInPathResolutionLoop, err := isInPathResolutionLoop(name, a.filetree) - if err != nil || isInPathResolutionLoop { + isInLoop, err := isInPathResolutionLoop(name, a.filetree) + if err != nil || isInLoop { return ret, err } - for _, child := range a.filetree.tree.Children(fn.FileNode) { + for _, child := range a.filetree.tree.Children(fna.FileNode) { requestPath := path.Join(name, filepath.Base(string(child.ID()))) r, err := a.Lstat(requestPath) if err == nil { @@ -157,7 +159,7 @@ func (a *osAdapter) ReadDir(name string) ([]fs.DirEntry, error) { // Lstat returns a FileInfo describing the named file. If the file is a symbolic link, the returned // FileInfo describes the symbolic link. Lstat makes no attempt to follow the link. func (a *osAdapter) Lstat(name string) (fs.FileInfo, error) { - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, // Lstat by definition requires that basename symlinks are not followed FollowBasenameLinks: false, @@ -166,13 +168,13 @@ func (a *osAdapter) Lstat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil || fn.FileNode == nil { + if !fna.HasFileNode() { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn.FileNode, + Node: *fna.FileNode, }, nil } @@ -187,7 +189,7 @@ func (a *osAdapter) Open(name string) (fs.File, error) { // Stat returns a FileInfo describing the named file. func (a *osAdapter) Stat(name string) (fs.FileInfo, error) { - fn, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ + fna, err := a.filetree.node(file.Path(name), linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true, DoNotFollowDeadBasenameLinks: a.doNotFollowDeadBasenameLinks, @@ -195,12 +197,12 @@ func (a *osAdapter) Stat(name string) (fs.FileInfo, error) { if err != nil { return &fileinfoAdapter{}, err } - if fn == nil || fn.FileNode == nil { + if !fna.HasFileNode() { return &fileinfoAdapter{}, os.ErrNotExist } return &fileinfoAdapter{ VirtualPath: file.Path(name), - Node: *fn.FileNode, + Node: *fna.FileNode, }, nil } diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index c94002dc..0ce4070c 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -219,6 +219,15 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] return results, nil } +type cacheRequest struct { + RealPath file.Path +} + +type cacheResult struct { + Paths file.PathSet + Error error +} + func (sc searchContext) allPathsToNode(fn *filenode.FileNode) ([]file.Path, error) { if fn == nil { return nil, nil @@ -226,7 +235,9 @@ func (sc searchContext) allPathsToNode(fn *filenode.FileNode) ([]file.Path, erro observedPaths := file.NewPathSet() - paths, err := sc.pathsToNode(fn, "", observedPaths) + cache := map[cacheRequest]cacheResult{} + + paths, err := sc.pathsToNode(fn, observedPaths, cache) if err != nil { return nil, err } @@ -239,19 +250,42 @@ func (sc searchContext) allPathsToNode(fn *filenode.FileNode) ([]file.Path, erro return pathsList, nil } -func (sc searchContext) pathsToNode(fn *filenode.FileNode, suffix string, observedPaths file.PathSet) (file.PathSet, error) { +func (sc searchContext) pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { + req := cacheRequest{ + RealPath: fn.RealPath, + } + + if result, ok := cache[req]; ok { + return result.Paths, result.Error + } + + paths, err := sc._pathsToNode(fn, observedPaths, cache) + + cache[req] = cacheResult{ + Paths: paths, + Error: err, + } + + return paths, err +} + +// nolint: funlen +func (sc searchContext) _pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { if fn == nil { return nil, nil } + paths := file.NewPathSet() + paths.Add(fn.RealPath) + if observedPaths != nil { if observedPaths.Contains(fn.RealPath) { - return nil, fmt.Errorf("found circular reference to path=%q", fn.RealPath) + // we've already observed this path, so we can stop here + return nil, nil } observedPaths.Add(fn.RealPath) } - paths := file.NewPathSet() nodeID := fn.ID() addPath := func(suffix string, ps ...file.Path) { @@ -263,34 +297,27 @@ func (sc searchContext) pathsToNode(fn *filenode.FileNode, suffix string, observ } } - if suffix == "" { - addPath(suffix, fn.RealPath) - } - // add all paths to the node that are linked to it - for linkSrcID := range sc.linkBackwardRefs[nodeID].Enumerate() { + for _, linkSrcID := range sc.linkBackwardRefs[nodeID].List() { pfn := sc.tree.tree.Node(linkSrcID) if pfn == nil { log.WithFields("id", nodeID, "parent", linkSrcID).Warn("found non-existent parent link") continue } - linkSrcPaths, err := sc.pathsToNode(pfn.(*filenode.FileNode), "", observedPaths) + linkSrcPaths, err := sc.pathsToNode(pfn.(*filenode.FileNode), observedPaths, cache) if err != nil { return nil, err } - addPath(suffix, linkSrcPaths.List()...) + addPath("", linkSrcPaths.List()...) } // crawl up the tree to find all paths to our parent and repeat - return paths, sc.pathsToParents(paths) -} - -func (sc searchContext) pathsToParents(paths file.PathSet) error { - for p := range paths.Enumerate() { + for _, p := range paths.List() { nextNestedSuffix := p.Basename() allParentPaths := p.ConstituentPaths() sort.Sort(sort.Reverse(file.Paths(allParentPaths))) + for _, pp := range allParentPaths { if pp == "/" { break @@ -304,21 +331,23 @@ func (sc searchContext) pathsToParents(paths file.PathSet) error { FollowBasenameLinks: false, }) if err != nil { - return fmt.Errorf("unable to get parent node for path=%q: %w", pp, err) + return nil, fmt.Errorf("unable to get parent node for path=%q: %w", pp, err) } if !pna.HasFileNode() { continue } - parentLinkPaths, err := sc.pathsToNode(pna.FileNode, nestedSuffix, nil) + parentLinkPaths, err := sc.pathsToNode(pna.FileNode, observedPaths, cache) if err != nil { - return err + return nil, err } - paths.Merge(parentLinkPaths) + addPath(nestedSuffix, parentLinkPaths.List()...) } } - return nil + observedPaths.Remove(fn.RealPath) + + return paths, nil } func (sc searchContext) fileNodesInTree(fileEntries []IndexEntry) ([]*filenode.FileNode, error) { diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 072e46a9..0d41a0a8 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -395,6 +395,37 @@ func Test_searchContext_allPathsToNode(t *testing.T) { want []file.Path wantErr require.ErrorAssertionFunc }{ + { + name: "simple dir", + want: []file.Path{ + "/path/to", + }, + input: func() input { + tree := NewFileTree() + + fileRef, err := tree.AddFile("/path/to/file.txt") + require.NoError(t, err) + require.NotNil(t, fileRef) + + idx := NewIndex() + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + + na, err := tree.node("/path/to", linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equal(t, file.Path("/path/to"), na.FileNode.RealPath) + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, { name: "dead symlink", want: []file.Path{ @@ -432,8 +463,12 @@ func Test_searchContext_allPathsToNode(t *testing.T) { }(), }, { - name: "symlink triangle cycle", - wantErr: require.Error, + name: "symlink triangle cycle", + want: []file.Path{ + "/1", + "/2", + "/3", + }, input: func() input { tree := NewFileTree() @@ -470,6 +505,60 @@ func Test_searchContext_allPathsToNode(t *testing.T) { } }(), }, + { + // note: this isn't a real link cycle, but it does look like one while resolving from a leaf to the root + name: "reverse symlink cycle", + want: []file.Path{ + "/bin/ttyd", + "/usr/bin/X11/ttyd", + "/usr/bin/ttyd", + }, + input: func() input { + tree := NewFileTree() + + usrRef, err := tree.AddDir("/usr") + require.NoError(t, err) + require.NotNil(t, usrRef) + + usrBinRef, err := tree.AddDir("/usr/bin") + require.NoError(t, err) + require.NotNil(t, usrBinRef) + + ttydRef, err := tree.AddFile("/usr/bin/ttyd") + require.NoError(t, err) + require.NotNil(t, ttydRef) + + binLinkRef, err := tree.AddSymLink("/bin", "usr/bin") + require.NoError(t, err) + require.NotNil(t, binLinkRef) + + x11LinkRef, err := tree.AddSymLink("/usr/bin/X11", ".") + require.NoError(t, err) + require.NotNil(t, x11LinkRef) + + idx := NewIndex() + idx.Add(*usrRef, file.Metadata{Type: file.TypeDir}) + idx.Add(*usrBinRef, file.Metadata{Type: file.TypeDir}) + idx.Add(*binLinkRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*x11LinkRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*ttydRef, file.Metadata{Type: file.TypeReg}) + + na, err := tree.node(ttydRef.RealPath, linkResolutionStrategy{ + FollowAncestorLinks: false, + FollowBasenameLinks: false, + DoNotFollowDeadBasenameLinks: false, + }) + require.NoError(t, err) + require.NotNil(t, na) + require.NotNil(t, na.FileNode) + require.Equalf(t, ttydRef.ID(), na.FileNode.Reference.ID(), "query node should be the same as usr/bin/ttyd binary") + + return input{ + query: na.FileNode, + sc: NewSearchContext(tree, idx).(*searchContext), + } + }(), + }, { name: "single leaf symlink", want: []file.Path{ diff --git a/pkg/image/image.go b/pkg/image/image.go index 34a28658..3e917adf 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -32,6 +32,8 @@ type Image struct { // FileCatalog contains all file metadata for all files in all layers FileCatalog *FileCatalog + SquashedSearchContext filetree.Searcher + overrideMetadata []AdditionalMetadata } @@ -214,7 +216,11 @@ func (i *Image) Read() error { i.Layers = layers // in order to resolve symlinks all squashed trees must be available - return i.squash(readProg) + err = i.squash(readProg) + + i.SquashedSearchContext = filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog.Index) + + return err } // squash generates a squash tree for each layer in the image. For instance, layer 2 squash = @@ -239,6 +245,7 @@ func (i *Image) squash(prog *progress.Manual) error { } layer.SquashedTree = squashedTree + layer.SquashedSearchContext = filetree.NewSearchContext(layer.SquashedTree, layer.fileCatalog.Index) lastSquashTree = squashedTree prog.N++ @@ -267,15 +274,11 @@ func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { return fetchFileContentsByPath(i.SquashedTree(), i.FileCatalog, path) } -func (i *Image) SquashedSearchContext() filetree.Searcher { - return filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog.Index) -} - // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. // Deprecated: please use SquashedSearchContext().SearchByMIMEType() instead. func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - refVias, err := i.SquashedSearchContext().SearchByMIMEType(mimeTypes...) + refVias, err := i.SquashedSearchContext.SearchByMIMEType(mimeTypes...) if err != nil { return nil, err } diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 63f2a5b0..fe2bb6ea 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -37,7 +37,9 @@ type Layer struct { // in lower layers relative to this one. SquashedTree *filetree.FileTree // fileCatalog contains all file metadata for all files in all layers (not just this layer) - fileCatalog *FileCatalog + fileCatalog *FileCatalog + SquashedSearchContext filetree.Searcher + SearchContext filetree.Searcher } // NewLayer provides a new, unread layer object. @@ -136,6 +138,8 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return fmt.Errorf("unknown layer media type: %+v", l.Metadata.MediaType) } + l.SearchContext = filetree.NewSearchContext(l.Tree, l.fileCatalog.Index) + monitor.SetCompleted() return nil @@ -157,7 +161,7 @@ func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { // Deprecated: use SearchContext().SearchByMIMEType() instead. func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - refVias, err := l.SearchContext().SearchByMIMEType(mimeTypes...) + refVias, err := l.SearchContext.SearchByMIMEType(mimeTypes...) if err != nil { return nil, err } @@ -173,7 +177,7 @@ func (l *Layer) FilesByMIMEType(mimeTypes ...string) ([]file.Reference, error) { // Deprecated: use SquashedSearchContext().SearchByMIMEType() instead. func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference, error) { var refs []file.Reference - refVias, err := l.SquashedSearchContext().SearchByMIMEType(mimeTypes...) + refVias, err := l.SquashedSearchContext.SearchByMIMEType(mimeTypes...) if err != nil { return nil, err } @@ -185,14 +189,6 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -func (l *Layer) SearchContext() filetree.Searcher { - return filetree.NewSearchContext(l.Tree, l.fileCatalog.Index) -} - -func (l *Layer) SquashedSearchContext() filetree.Searcher { - return filetree.NewSearchContext(l.SquashedTree, l.fileCatalog.Index) -} - func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { return func(index file.TarIndexEntry) error { var err error diff --git a/pkg/tree/node/id.go b/pkg/tree/node/id.go index 706a8bf5..4509eabe 100644 --- a/pkg/tree/node/id.go +++ b/pkg/tree/node/id.go @@ -9,7 +9,7 @@ func NewIDSet() IDSet { } func (s IDSet) Merge(other IDSet) { - for i := range other.Enumerate() { + for _, i := range other.List() { s.Add(i) } } @@ -46,17 +46,6 @@ func (s IDSet) List() []ID { return ret } -func (s IDSet) Enumerate() <-chan ID { - ret := make(chan ID) - go func() { - defer close(ret) - for i := range s { - ret <- i - } - }() - return ret -} - func (s IDSet) ContainsAny(ids ...ID) bool { for _, i := range ids { _, ok := s[i] diff --git a/test/integration/mime_type_detection_test.go b/test/integration/mime_type_detection_test.go index abfee547..eb43edd7 100644 --- a/test/integration/mime_type_detection_test.go +++ b/test/integration/mime_type_detection_test.go @@ -22,7 +22,7 @@ func TestContentMIMETypeDetection(t *testing.T) { } for mimeType, paths := range pathsByMIMEType { - refs, err := img.SquashedSearchContext().SearchByMIMEType(mimeType) + refs, err := img.SquashedSearchContext.SearchByMIMEType(mimeType) assert.NoError(t, err) assert.NotZero(t, len(refs), "found no refs for type=%q", mimeType) for _, ref := range refs { From efce1d4a80ba7fef00a31b9ffc13f35307c0b464 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 14:24:04 -0500 Subject: [PATCH 13/35] add tests for filetree.Index Signed-off-by: Alex Goodman --- pkg/filetree/index_test.go | 600 +++++++++++++++++++++++++++++++++++++ 1 file changed, 600 insertions(+) create mode 100644 pkg/filetree/index_test.go diff --git a/pkg/filetree/index_test.go b/pkg/filetree/index_test.go new file mode 100644 index 00000000..436cb767 --- /dev/null +++ b/pkg/filetree/index_test.go @@ -0,0 +1,600 @@ +//go:build !windows +// +build !windows + +package filetree + +import ( + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" + + "github.com/anchore/stereoscope/pkg/file" +) + +func commonIndexFixture(t *testing.T) Index { + t.Helper() + + tree := NewFileTree() + idx := NewIndex() + + addDir := func(path file.Path) { + ref, err := tree.AddDir(path) + require.NoError(t, err, "failed to add DIR reference to index") + require.NotNil(t, ref, "failed to add DIR reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeDir, IsDir: true}) + } + + addFile := func(path file.Path) { + ref, err := tree.AddFile(path) + require.NoError(t, err, "failed to add FILE reference to index") + require.NotNil(t, ref, "failed to add FILE reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeReg, MIMEType: "text/plain"}) + } + + addLink := func(from, to file.Path) { + ref, err := tree.AddSymLink(from, to) + require.NoError(t, err, "failed to add LINK reference to index") + require.NotNil(t, ref, "failed to add LINK reference to index (nil ref") + idx.Add(*ref, file.Metadata{Path: string(from), LinkDestination: string(to), Type: file.TypeSymlink}) + } + + // mkdir -p path/branch.d/one + // mkdir -p path/branch.d/two + // mkdir -p path/common + + // note: we need to add all paths explicitly to the index + addDir("/path") + addDir("/path/branch.d") + addDir("/path/branch.d/one") + addDir("/path/branch.d/two") + addDir("/path/common") + + // echo "first file" > path/branch.d/one/file-1.txt + // echo "forth file" > path/branch.d/one/file-4.d + // echo "multi ext file" > path/branch.d/one/file-4.tar.gz + // echo "hidden file" > path/branch.d/one/.file-4.tar.gz + + addFile("/path/branch.d/one/file-1.txt") + addFile("/path/branch.d/one/file-4.d") + addFile("/path/branch.d/one/file-4.tar.gz") + addFile("/path/branch.d/one/.file-4.tar.gz") + + // ln -s path/branch.d path/common/branch.d + // ln -s path/branch.d path/common/branch + // ln -s path/branch.d/one/file-4.d path/common/file-4 + // ln -s path/branch.d/one/file-1.txt path/common/file-1.d + + addLink("/path/common/branch.d", "path/branch.d") + addLink("/path/common/branch", "path/branch.d") + addLink("/path/common/file-4", "path/branch.d/one/file-4.d") + addLink("/path/common/file-1.d", "path/branch.d/one/file-1.txt") + + // echo "second file" > path/branch.d/two/file-2.txt + // echo "third file" > path/file-3.txt + + addFile("/path/branch.d/two/file-2.txt") + addFile("/path/file-3.txt") + + return idx +} + +func Test_fileExtensions(t *testing.T) { + tests := []struct { + name string + path string + want []string + }{ + { + name: "empty", + path: "", + }, + { + name: "directory", + path: "/somewhere/to/nowhere/", + }, + { + name: "directory with ext", + path: "/somewhere/to/nowhere.d/", + }, + { + name: "single extension", + path: "/somewhere/to/my.tar", + want: []string{".tar"}, + }, + { + name: "multiple extensions", + path: "/somewhere/to/my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . prefix", + path: "/somewhere/to/.my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore more . prefixes", + path: "/somewhere/to/...my.tar.gz", + want: []string{".gz", ".tar.gz"}, + }, + { + name: "ignore . suffixes", + path: "/somewhere/to/my.tar.gz...", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, fileExtensions(tt.path)) + }) + } +} + +func TestFileCatalog_GetByExtension(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get simple extension", + input: ".txt", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get mixed type extension", + input: ".d", + want: []IndexEntry{ + { + + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + + { + + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get long extension", + input: ".tar.gz", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get short extension", + input: ".gz", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existent extension", + input: ".blerg-123", + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByExtension(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasename(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.txt", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "file-11.txt", + want: []IndexEntry{}, + }, + { + name: "get directory name", + input: "branch.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file-1.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file-1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByBasename(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByBasenameGlob(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file name", + input: "file-1.*", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing name", + input: "blerg-*.txt", + want: []IndexEntry{}, + }, + { + name: "get directory name", + input: "bran*.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get symlink name", + input: "file?1.d", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get basename with path expression", + input: "somewhere/file?1.d", + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByBasenameGlob(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetByMimeType(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input string + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get existing file mimetype", + input: "text/plain", + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get non-existing mimetype", + input: "text/bogus", + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByMIMEType(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + +func TestFileCatalog_GetBasenames(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + want []string + }{ + { + name: "go case", + want: []string{ + ".file-4.tar.gz", + "branch", + "branch.d", + "common", + "file-1.d", + "file-1.txt", + "file-2.txt", + "file-3.txt", + "file-4", + "file-4.d", + "file-4.tar.gz", + "one", + "path", + "two", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := fileIndex.Basenames() + assert.ElementsMatchf(t, tt.want, actual, "diff: %s", cmp.Diff(tt.want, actual)) + }) + } +} From 0b18b2565abef9f4206f64d005e35ea46c1456d9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 15:47:51 -0500 Subject: [PATCH 14/35] add search by parent basename and fix requirements filtering Signed-off-by: Alex Goodman --- pkg/file/reference.go | 19 ++++++++ pkg/filetree/glob_parser.go | 27 +++++++++++ pkg/filetree/glob_parser_test.go | 37 +++++++++++++-- pkg/filetree/search.go | 78 +++++++++++++++++++++++++------- pkg/filetree/search_test.go | 66 ++++++++++++++++++++++----- 5 files changed, 196 insertions(+), 31 deletions(-) diff --git a/pkg/file/reference.go b/pkg/file/reference.go index ca2c12ff..7113fee8 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -19,6 +19,8 @@ type ReferenceAccessVia struct { LeafLinkResolution []ReferenceAccess } +type ReferenceAccessVias []ReferenceAccessVia + func (f *ReferenceAccessVia) HasReference() bool { if f == nil { return false @@ -49,6 +51,23 @@ func (f *ReferenceAccessVia) AllPaths() []Path { return results } +func (f *ReferenceAccessVia) AllRequestPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + for _, p := range f.LeafLinkResolution { + set.Add(string(p.RequestPath)) + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + // RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. func (f *ReferenceAccessVia) RequestResolutionPath() []Path { var paths []Path diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go index 0ef8dcc6..e8522a0f 100644 --- a/pkg/filetree/glob_parser.go +++ b/pkg/filetree/glob_parser.go @@ -11,6 +11,7 @@ const ( searchByExtension searchByBasename searchByBasenameGlob + searchByParentBasename ) type searchBasis int @@ -27,6 +28,8 @@ func (s searchBasis) String() string { return "basename" case searchByBasenameGlob: return "basename-glob" + case searchByParentBasename: + return "parent-basename" } return "unknown search basis" } @@ -37,6 +40,14 @@ type searchRequest struct { requirement string } +func (s searchRequest) String() string { + value := s.searchBasis.String() + ": " + s.value + if s.requirement != "" { + value += " (requirement: " + s.requirement + ")" + } + return value +} + func parseGlob(glob string) []searchRequest { glob = cleanGlob(glob) @@ -50,6 +61,22 @@ func parseGlob(glob string) []searchRequest { } beforeBasename, basename := splitAtBasename(glob) + + if basename == "*" { + _, nestedBasename := splitAtBasename(beforeBasename) + if !strings.ContainsAny(nestedBasename, "*?[]{}") { + // special case: glob is a parent glob + requests := []searchRequest{ + { + searchBasis: searchByParentBasename, + value: nestedBasename, + requirement: beforeBasename, + }, + } + return requests + } + } + requests := parseGlobBasename(basename) for i := range requests { applyRequirement(&requests[i], beforeBasename, glob) diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go index b3727cf0..5999635d 100644 --- a/pkg/filetree/glob_parser_test.go +++ b/pkg/filetree/glob_parser_test.go @@ -233,15 +233,26 @@ func Test_parseGlob(t *testing.T) { }, { name: "fallback to full glob search", - glob: "**/foo/bar/*", + glob: "**/foo/bar/**?/**", want: []searchRequest{ { searchBasis: searchByGlob, - value: "**/foo/bar/*", + value: "**/foo/bar/*?/**", }, }, }, - // edge cases + { + name: "use parent basename for directory contents", + glob: "**/foo/bar/*", + want: []searchRequest{ + { + searchBasis: searchByParentBasename, + value: "bar", + requirement: "**/foo/bar", + }, + }, + }, + // special cases { name: "empty string", glob: "", @@ -282,6 +293,26 @@ func Test_parseGlob(t *testing.T) { }, }, }, + { + name: "ends with *", + glob: "**/foo/b*r/*", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/b*r/*", + }, + }, + }, + { + name: "ends with ***", + glob: "**/foo/b*r/**", + want: []searchRequest{ + { + searchBasis: searchByGlob, + value: "**/foo/b*r/**", + }, + }, + }, { name: "spaces around everything", glob: " /foo/b*r/ .txt ", diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 0ce4070c..adeebeac 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -27,8 +27,6 @@ type searchContext struct { // the following enables correct link resolution when searching via the index linkForwardRef map[node.ID]node.ID // {link-node-id: link-destination-node-id} linkBackwardRefs map[node.ID]node.IDSet // {link-destination-node-id: str([link-node-id, ...])} - // allLinks node.IDSet // set([link-node-id, ...]) // all links, regardless of whether they are resolved or not - // unresolvedLinks node.IDSet // set([link-node-id, ...]) // we have not figured the forward/backward refs for these links yet } func NewSearchContext(tree *FileTree, index Index) Searcher { @@ -176,6 +174,9 @@ func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResol return nil, err } return refs, nil + case searchByParentBasename: + return sc.searchByParentBasename(request) + case searchByGlob: log.WithFields("glob", request.value).Trace("glob provided is an expensive search, consider using a more specific indexed search") @@ -186,6 +187,46 @@ func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResol return nil, fmt.Errorf("invalid search request: %+v", request.searchBasis) } +func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.ReferenceAccessVia, error) { + indexes, err := sc.index.GetByBasename(request.value) + if err != nil { + return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) + } + refs, err := sc.referencesWithRequirement(request.requirement, indexes) + if err != nil { + return nil, err + } + + var results []file.ReferenceAccessVia + for _, ref := range refs { + paths, err := sc.tree.ListPaths(ref.RequestPath) + if err != nil { + // this may not be a directory, that's alright, just continue... + continue + } + for _, p := range paths { + _, nestedRef, err := sc.tree.File(p, FollowBasenameLinks) + if err != nil { + return nil, fmt.Errorf("unable to fetch file reference from parent path %q for path=%q: %w", ref.RequestPath, p, err) + } + if !nestedRef.HasReference() { + continue + } + // note: the requirement was written for the parent, so we need to consider the new + // child path by adding /* to match all children + matches, err := matchesRequirement(*nestedRef, request.requirement+"/*") + if err != nil { + return nil, err + } + if matches { + results = append(results, *nestedRef) + } + } + } + + return results, nil +} + func (sc searchContext) referencesWithRequirement(requirement string, entries []IndexEntry) ([]file.ReferenceAccessVia, error) { refs, err := sc.referencesInTree(entries) if err != nil { @@ -198,27 +239,32 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] var results []file.ReferenceAccessVia for _, ref := range refs { - var foundMatchingRequirement bool - allRefPaths := ref.AllPaths() - for _, p := range allRefPaths { - matched, err := doublestar.Match(requirement, string(p)) - if err != nil { - return nil, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) - } - if matched { - foundMatchingRequirement = true - break - } + matches, err := matchesRequirement(ref, requirement) + if err != nil { + return nil, err } - if !foundMatchingRequirement { - continue + if matches { + results = append(results, ref) } - results = append(results, ref) } return results, nil } +func matchesRequirement(ref file.ReferenceAccessVia, requirement string) (bool, error) { + allRefPaths := ref.AllRequestPaths() + for _, p := range allRefPaths { + matched, err := doublestar.Match(requirement, string(p)) + if err != nil { + return false, fmt.Errorf("unable to match glob pattern=%q to path=%q: %w", requirement, p, err) + } + if matched { + return true, nil + } + } + return false, nil +} + type cacheRequest struct { RealPath file.Path } diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 0d41a0a8..047a880b 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -118,11 +118,16 @@ func Test_searchContext_SearchByGlob(t *testing.T) { require.NoError(t, err) require.NotNil(t, fileRef) + toRef, err := tree.AddDir("/path/to") + require.NoError(t, err) + require.NotNil(t, toRef) + idx := NewIndex() idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) idx.Add(*doubleLinkToPathRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*toRef, file.Metadata{Type: file.TypeDir}) defaultFields := fields{ tree: tree, @@ -142,6 +147,8 @@ func Test_searchContext_SearchByGlob(t *testing.T) { args: args{ glob: "/**/t?/fil?.txt", }, + // note: result "/link-to-file" resolves to the file but does not show up since the request path + // does not match the requirement glob want: []file.ReferenceAccessVia{ { ReferenceAccess: file.ReferenceAccess{ @@ -153,20 +160,32 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, { ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-file", + RequestPath: "/link-to-path/to/file.txt", Reference: &file.Reference{ RealPath: "/path/to/file.txt", }, }, - LeafLinkResolution: []file.ReferenceAccess{ - { - RequestPath: "/link-to-file", - Reference: &file.Reference{ - RealPath: "/link-to-file", - }, + }, + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, + }, + }, + { + name: "ancestor access path exists", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/link-to-path/to/file.txt", + }, + want: []file.ReferenceAccessVia{ { ReferenceAccess: file.ReferenceAccess{ RequestPath: "/link-to-path/to/file.txt", @@ -175,6 +194,15 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, }, }, + }, + }, + { + name: "access all children", + fields: defaultFields, + args: args{ + glob: "**/path/to/*", + }, + want: []file.ReferenceAccessVia{ { ReferenceAccess: file.ReferenceAccess{ RequestPath: "/path/to/file.txt", @@ -186,13 +214,27 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, }, { - name: "ancestor access path exists", + name: "access all children as path", fields: defaultFields, args: args{ - // note: this is a glob through a symlink (ancestor). If not using the index, this will work - // just fine, since we do a full tree search. However, if using the index, this shortcut will - // dodge any ancestor symlink and will not find the file. - glob: "**/link-to-path/to/file.txt", + glob: "/path/to/*", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, + { + name: "access via symlink for all children", + fields: defaultFields, + args: args{ + glob: "**/link-to-path/to/*", }, want: []file.ReferenceAccessVia{ { From b59e4899daa235a6d0f4fc9ba58317d8aba673e0 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 17:22:12 -0500 Subject: [PATCH 15/35] sort search results Signed-off-by: Alex Goodman --- pkg/file/reference.go | 133 ------------------------ pkg/file/reference_access.go | 162 ++++++++++++++++++++++++++++++ pkg/file/reference_access_test.go | 131 ++++++++++++++++++++++++ pkg/filetree/glob_parser.go | 6 +- pkg/filetree/glob_parser_test.go | 1 + pkg/filetree/search.go | 13 ++- pkg/filetree/search_test.go | 55 +++++++++- 7 files changed, 362 insertions(+), 139 deletions(-) create mode 100644 pkg/file/reference_access.go create mode 100644 pkg/file/reference_access_test.go diff --git a/pkg/file/reference.go b/pkg/file/reference.go index 7113fee8..27da47bc 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -2,147 +2,14 @@ package file import ( "fmt" - "sort" - - "github.com/scylladb/go-set/strset" ) -// ReferenceAccess represents the fetching of a possibly non-existent file, and how it was accessed. -type ReferenceAccess struct { - RequestPath Path - *Reference -} - -// ReferenceAccessVia represents a possibly non-existent file, and how it was accessed, including all symlink and hardlink resolution. -type ReferenceAccessVia struct { - ReferenceAccess - LeafLinkResolution []ReferenceAccess -} - -type ReferenceAccessVias []ReferenceAccessVia - -func (f *ReferenceAccessVia) HasReference() bool { - if f == nil { - return false - } - return f.Reference != nil -} - -func (f *ReferenceAccessVia) AllPaths() []Path { - set := strset.New() - set.Add(string(f.RequestPath)) - if f.Reference != nil { - set.Add(string(f.Reference.RealPath)) - } - for _, p := range f.LeafLinkResolution { - set.Add(string(p.RequestPath)) - if p.Reference != nil { - set.Add(string(p.Reference.RealPath)) - } - } - - paths := set.List() - sort.Strings(paths) - - var results []Path - for _, p := range paths { - results = append(results, Path(p)) - } - return results -} - -func (f *ReferenceAccessVia) AllRequestPaths() []Path { - set := strset.New() - set.Add(string(f.RequestPath)) - for _, p := range f.LeafLinkResolution { - set.Add(string(p.RequestPath)) - } - - paths := set.List() - sort.Strings(paths) - - var results []Path - for _, p := range paths { - results = append(results, Path(p)) - } - return results -} - -// RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceAccessVia) RequestResolutionPath() []Path { - var paths []Path - var firstPath Path - var lastLinkResolutionIsDead bool - - if string(f.RequestPath) != "" { - firstPath = f.RequestPath - paths = append(paths, f.RequestPath) - } - for i, p := range f.LeafLinkResolution { - if i == 0 && p.RequestPath == f.RequestPath { - // ignore link resolution that starts with the same user requested path - continue - } - if firstPath == "" { - firstPath = p.RequestPath - } - - paths = append(paths, p.RequestPath) - - if i == len(f.LeafLinkResolution)-1 { - // we've reached the final link resolution - if p.Reference == nil { - lastLinkResolutionIsDead = true - } - } - } - if f.HasReference() && firstPath != f.Reference.RealPath && !lastLinkResolutionIsDead { - // we've reached the final reference that was resolved - // we should only do this if there was a link resolution - paths = append(paths, f.Reference.RealPath) - } - return paths -} - -// ResolutionReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceAccessVia) ResolutionReferences() []Reference { - var refs []Reference - var lastLinkResolutionIsDead bool - - for i, p := range f.LeafLinkResolution { - if p.Reference != nil { - refs = append(refs, *p.Reference) - } - if i == len(f.LeafLinkResolution)-1 { - // we've reached the final link resolution - if p.Reference == nil { - lastLinkResolutionIsDead = true - } - } - } - if f.Reference != nil && !lastLinkResolutionIsDead { - refs = append(refs, *f.Reference) - } - return refs -} - // Reference represents a unique file. This is useful when path is not good enough (i.e. you have the same file path for two files in two different container image layers, and you need to be able to distinguish them apart) type Reference struct { id ID RealPath Path // file path with NO symlinks or hardlinks in constituent paths } -// NewFileReferenceVia shows how a reference was accessed. -func NewFileReferenceVia(path Path, ref *Reference, leafs []ReferenceAccess) *ReferenceAccessVia { - return &ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: path, - Reference: ref, - }, - LeafLinkResolution: leafs, - } -} - // NewFileReference creates a new unique file reference for the given path. func NewFileReference(path Path) *Reference { nextID++ diff --git a/pkg/file/reference_access.go b/pkg/file/reference_access.go new file mode 100644 index 00000000..8017e88f --- /dev/null +++ b/pkg/file/reference_access.go @@ -0,0 +1,162 @@ +package file + +import ( + "sort" + + "github.com/scylladb/go-set/strset" +) + +// ReferenceAccess represents the fetching of a possibly non-existent file, and how it was accessed. +type ReferenceAccess struct { + RequestPath Path + *Reference +} + +// ReferenceAccessVia represents a possibly non-existent file, and how it was accessed, including all symlink and hardlink resolution. +type ReferenceAccessVia struct { + ReferenceAccess + LeafLinkResolution []ReferenceAccess +} + +type ReferenceAccessVias []ReferenceAccessVia + +// NewFileReferenceVia shows how a reference was accessed. +func NewFileReferenceVia(path Path, ref *Reference, leafs []ReferenceAccess) *ReferenceAccessVia { + return &ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: path, + Reference: ref, + }, + LeafLinkResolution: leafs, + } +} + +func (f ReferenceAccessVias) Len() int { + return len(f) +} + +func (f ReferenceAccessVias) Less(i, j int) bool { + ith := f[i] + jth := f[j] + + ithIsReal := ith.Reference != nil && ith.Reference.RealPath == ith.RequestPath + jthIsReal := jth.Reference != nil && jth.Reference.RealPath == jth.RequestPath + + switch { + case ithIsReal && !jthIsReal: + return true + case !ithIsReal && jthIsReal: + return false + } + + return ith.RequestPath < jth.RequestPath +} + +func (f ReferenceAccessVias) Swap(i, j int) { + f[i], f[j] = f[j], f[i] +} + +func (f *ReferenceAccessVia) HasReference() bool { + if f == nil { + return false + } + return f.Reference != nil +} + +func (f *ReferenceAccessVia) AllPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + if f.Reference != nil { + set.Add(string(f.Reference.RealPath)) + } + for _, p := range f.LeafLinkResolution { + set.Add(string(p.RequestPath)) + if p.Reference != nil { + set.Add(string(p.Reference.RealPath)) + } + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + +func (f *ReferenceAccessVia) AllRequestPaths() []Path { + set := strset.New() + set.Add(string(f.RequestPath)) + for _, p := range f.LeafLinkResolution { + set.Add(string(p.RequestPath)) + } + + paths := set.List() + sort.Strings(paths) + + var results []Path + for _, p := range paths { + results = append(results, Path(p)) + } + return results +} + +// RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceAccessVia) RequestResolutionPath() []Path { + var paths []Path + var firstPath Path + var lastLinkResolutionIsDead bool + + if string(f.RequestPath) != "" { + firstPath = f.RequestPath + paths = append(paths, f.RequestPath) + } + for i, p := range f.LeafLinkResolution { + if i == 0 && p.RequestPath == f.RequestPath { + // ignore link resolution that starts with the same user requested path + continue + } + if firstPath == "" { + firstPath = p.RequestPath + } + + paths = append(paths, p.RequestPath) + + if i == len(f.LeafLinkResolution)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } + } + } + if f.HasReference() && firstPath != f.Reference.RealPath && !lastLinkResolutionIsDead { + // we've reached the final reference that was resolved + // we should only do this if there was a link resolution + paths = append(paths, f.Reference.RealPath) + } + return paths +} + +// ResolutionReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *ReferenceAccessVia) ResolutionReferences() []Reference { + var refs []Reference + var lastLinkResolutionIsDead bool + + for i, p := range f.LeafLinkResolution { + if p.Reference != nil { + refs = append(refs, *p.Reference) + } + if i == len(f.LeafLinkResolution)-1 { + // we've reached the final link resolution + if p.Reference == nil { + lastLinkResolutionIsDead = true + } + } + } + if f.Reference != nil && !lastLinkResolutionIsDead { + refs = append(refs, *f.Reference) + } + return refs +} diff --git a/pkg/file/reference_access_test.go b/pkg/file/reference_access_test.go new file mode 100644 index 00000000..34540b9f --- /dev/null +++ b/pkg/file/reference_access_test.go @@ -0,0 +1,131 @@ +package file + +import ( + "github.com/stretchr/testify/assert" + "sort" + "testing" +) + +func TestReferenceAccessVias_Less(t *testing.T) { + + realA := ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent/a", + Reference: &Reference{ + RealPath: "/parent/a", + }, + }, + } + + realB := ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent/b", + Reference: &Reference{ + RealPath: "/parent/b", + }, + }, + } + + linkToA := ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent-link/a", + Reference: &Reference{ + RealPath: "/a", + }, + }, + } + + linkToB := ReferenceAccessVia{ + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent-link/b", + Reference: &Reference{ + RealPath: "/b", + }, + }, + } + + tests := []struct { + name string + subject []ReferenceAccessVia + want []ReferenceAccessVia + }{ + { + name: "references to real files are preferred first", + subject: []ReferenceAccessVia{ + linkToA, + realA, + }, + want: []ReferenceAccessVia{ + realA, + linkToA, + }, + }, + { + name: "real files are treated equally by request name", + subject: []ReferenceAccessVia{ + realB, + realA, + }, + want: []ReferenceAccessVia{ + realA, + realB, + }, + }, + { + name: "link files are treated equally by request name", + subject: []ReferenceAccessVia{ + linkToB, + linkToA, + }, + want: []ReferenceAccessVia{ + linkToA, + linkToB, + }, + }, + { + name: "regression", + subject: []ReferenceAccessVia{ + { + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + { + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + }, + want: []ReferenceAccessVia{ + { + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + { + ReferenceAccess: ReferenceAccess{ + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sort.Sort(ReferenceAccessVias(tt.subject)) + assert.Equal(t, tt.want, tt.subject) + }) + } +} diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go index e8522a0f..8579d2ed 100644 --- a/pkg/filetree/glob_parser.go +++ b/pkg/filetree/glob_parser.go @@ -114,7 +114,9 @@ func applyRequirement(request *searchRequest, beforeBasename, glob string) { requirement = glob switch beforeBasename { case "**", request.requirement: - requirement = "" + if request.searchBasis != searchByExtension { + requirement = "" + } } } else { requirement = "" @@ -150,7 +152,7 @@ func parseGlobBasename(basenameInput string) []searchRequest { } if !strings.ContainsAny(basenameInput, "*?") { - // special case, this is plain extension + // special case, this is plain basename return []searchRequest{ { searchBasis: searchByBasename, diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go index 5999635d..a770f61a 100644 --- a/pkg/filetree/glob_parser_test.go +++ b/pkg/filetree/glob_parser_test.go @@ -49,6 +49,7 @@ func Test_parseGlob(t *testing.T) { { searchBasis: searchByExtension, value: ".txt", + requirement: "**/*.txt", }, }, }, diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index adeebeac..eb1340b7 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -107,7 +107,14 @@ func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceA fileEntries = append(fileEntries, entries...) } - return sc.referencesInTree(fileEntries) + refs, err := sc.referencesInTree(fileEntries) + if err != nil { + return nil, err + } + + sort.Sort(file.ReferenceAccessVias(refs)) + + return refs, nil } // add case for status.d/* like things that hook up directly into filetree.ListPaths() @@ -224,6 +231,8 @@ func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Re } } + sort.Sort(file.ReferenceAccessVias(results)) + return results, nil } @@ -248,6 +257,8 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] } } + sort.Sort(file.ReferenceAccessVias(results)) + return results, nil } diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 047a880b..82ed9dc7 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -152,7 +152,7 @@ func Test_searchContext_SearchByGlob(t *testing.T) { want: []file.ReferenceAccessVia{ { ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/double-link-to-path/to/file.txt", + RequestPath: "/path/to/file.txt", Reference: &file.Reference{ RealPath: "/path/to/file.txt", }, @@ -160,7 +160,7 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, { ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", + RequestPath: "/double-link-to-path/to/file.txt", Reference: &file.Reference{ RealPath: "/path/to/file.txt", }, @@ -168,7 +168,7 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, { ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", + RequestPath: "/link-to-path/to/file.txt", Reference: &file.Reference{ RealPath: "/path/to/file.txt", }, @@ -312,6 +312,55 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, }, }, + { + name: "by extension", + fields: defaultFields, + args: args{ + // note: this is a glob through a symlink (ancestor). If not using the index, this will work + // just fine, since we do a full tree search. However, if using the index, this shortcut will + // dodge any ancestor symlink and will not find the file. + glob: "**/*.txt", + }, + want: []file.ReferenceAccessVia{ + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{RealPath: "/path/to/file.txt"}, + }, + }, + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + // note: this is NOT expected since the input glob does not match against the request path + //{ + // ReferenceAccess: file.ReferenceAccess{ + // RequestPath: "/link-to-file", + // Reference: &file.Reference{ + // RealPath: "/path/to/file.txt", + // }, + // }, + // LeafLinkResolution: []file.ReferenceAccess{ + // { + // RequestPath: "/link-to-file", + // Reference: &file.Reference{RealPath: "/link-to-file"}, + // }, + // }, + //}, + { + ReferenceAccess: file.ReferenceAccess{ + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", + }, + }, + }, + }, + }, { name: "path does not exists", fields: defaultFields, From e2c6d5dc1e69c2c9bb3b9334c6d94edcbc9f2680 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 1 Feb 2023 19:45:49 -0500 Subject: [PATCH 16/35] change file.Type to int + fix layer 0 squashed search context Signed-off-by: Alex Goodman --- pkg/file/type.go | 20 ++++++++++---------- pkg/filetree/filetree.go | 8 +++++--- pkg/filetree/search.go | 13 ++++++++----- pkg/image/image.go | 1 + 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pkg/file/type.go b/pkg/file/type.go index 4c6f6775..0d587a1b 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -6,20 +6,20 @@ import ( ) const ( - TypeReg Type = 'r' - TypeHardLink Type = 'h' - TypeSymlink Type = 'l' - TypeCharacterDevice Type = 'c' - TypeBlockDevice Type = 'b' - TypeDir Type = 'd' - TypeFifo Type = 'f' - TypeSocket Type = 's' - TypeIrregular Type = '?' + TypeReg Type = iota + TypeHardLink + TypeSymlink + TypeCharacterDevice + TypeBlockDevice + TypeDir + TypeFifo + TypeSocket + TypeIrregular ) // why use a rune type? we're looking for something that is memory compact but is easily human interpretable. -type Type rune +type Type int func AllTypes() []Type { return []Type{ diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 555e736e..b1cbda87 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -6,6 +6,8 @@ import ( "path" "strings" + "github.com/scylladb/go-set/iset" + "github.com/anchore/stereoscope/internal" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree/filenode" @@ -109,15 +111,15 @@ func (t *FileTree) AllFiles(types ...file.Type) []file.Reference { types = []file.Type{file.TypeReg} } - typeSet := internal.NewStringSet() + typeSet := iset.New() for _, t := range types { - typeSet.Add(string(t)) + typeSet.Add(int(t)) } var files []file.Reference for _, n := range t.tree.Nodes() { f := n.(*filenode.FileNode) - if typeSet.Contains(string(f.FileType)) && f.Reference != nil { + if typeSet.Has(int(f.FileType)) && f.Reference != nil { files = append(files, *f.Reference) } } diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index eb1340b7..479aeaa7 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -124,7 +124,12 @@ func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOp if sc.index == nil { options = append(options, FollowBasenameLinks) - return sc.tree.FilesByGlob(pattern, options...) + refs, err := sc.tree.FilesByGlob(pattern, options...) + if err != nil { + return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) + } + sort.Sort(file.ReferenceAccessVias(refs)) + return refs, nil } var allRefs []file.ReferenceAccessVia @@ -136,6 +141,8 @@ func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOp allRefs = append(allRefs, refs...) } + sort.Sort(file.ReferenceAccessVias(allRefs)) + return allRefs, nil } @@ -231,8 +238,6 @@ func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Re } } - sort.Sort(file.ReferenceAccessVias(results)) - return results, nil } @@ -257,8 +262,6 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] } } - sort.Sort(file.ReferenceAccessVias(results)) - return results, nil } diff --git a/pkg/image/image.go b/pkg/image/image.go index 3e917adf..e7c3b3cf 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -232,6 +232,7 @@ func (i *Image) squash(prog *progress.Manual) error { if idx == 0 { lastSquashTree = layer.Tree layer.SquashedTree = layer.Tree + layer.SquashedSearchContext = filetree.NewSearchContext(layer.SquashedTree, layer.fileCatalog.Index) continue } From 5dbc9e9175ce7d38c8f3e36c8ec3aae25900034e Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 10:21:51 -0500 Subject: [PATCH 17/35] more cleanup Signed-off-by: Alex Goodman --- pkg/file/metadata.go | 59 +++++++++++++++++-- pkg/file/metadata_test.go | 42 +++++++++++++ pkg/file/path_set.go | 6 +- pkg/file/reference.go | 4 +- .../link_to_link_to_new_readme | 1 + .../symlinks-simple/link_to_new_readme | 1 + pkg/file/test-fixtures/symlinks-simple/readme | 2 + 7 files changed, 106 insertions(+), 9 deletions(-) create mode 120000 pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme create mode 120000 pkg/file/test-fixtures/symlinks-simple/link_to_new_readme create mode 100644 pkg/file/test-fixtures/symlinks-simple/readme diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 4eb93410..7ba0014f 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -6,6 +6,9 @@ import ( "os" "path" "path/filepath" + "syscall" + + "github.com/anchore/stereoscope/internal/log" "github.com/sylabs/squashfs" ) @@ -17,10 +20,9 @@ type Metadata struct { // LinkDestination is populated only for hardlinks / symlinks, can be an absolute or relative LinkDestination string // Size of the file in bytes - Size int64 - UserID int - GroupID int - // Type is the tar.Type entry for the file + Size int64 + UserID int + GroupID int Type Type IsDir bool Mode os.FileMode @@ -87,3 +89,52 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error return md, nil } + +func NewMetadataFromPath(path string, info os.FileInfo) Metadata { + var mimeType string + uid, gid := getXid(info) + + ty := TypeFromMode(info.Mode()) + + if ty == TypeReg { + f, err := os.Open(path) + if err != nil { + // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns + f = nil + } else { + defer func() { + if err := f.Close(); err != nil { + log.Warnf("unable to close file while obtaining metadata: %s", path) + } + }() + } + + mimeType = MIMEType(f) + } + + // TODO: should we clean up path to be the real, absolute path? + + return Metadata{ + Path: path, + Mode: info.Mode(), + Type: ty, + // unsupported across platforms + UserID: uid, + GroupID: gid, + Size: info.Size(), + MIMEType: mimeType, + IsDir: info.IsDir(), + } +} + +// getXid is the UID GID system info for unix +func getXid(info os.FileInfo) (uid, gid int) { + uid = -1 + gid = -1 + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + uid = int(stat.Uid) + gid = int(stat.Gid) + } + + return uid, gid +} diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go index cf3d1ee2..4b69016d 100644 --- a/pkg/file/metadata_test.go +++ b/pkg/file/metadata_test.go @@ -4,6 +4,8 @@ package file import ( + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "io" "os" "strings" @@ -43,3 +45,43 @@ func TestFileMetadataFromTar(t *testing.T) { t.Errorf("diff: %s", d) } } + +func TestFileMetadataFromPath(t *testing.T) { + + tests := []struct { + path string + expectedType Type + expectedMIMEType string + }{ + { + path: "test-fixtures/symlinks-simple/readme", + expectedType: TypeReg, + expectedMIMEType: "text/plain", + }, + { + path: "test-fixtures/symlinks-simple/link_to_new_readme", + expectedType: TypeSymlink, + expectedMIMEType: "", + }, + { + path: "test-fixtures/symlinks-simple/link_to_link_to_new_readme", + expectedType: TypeSymlink, + expectedMIMEType: "", + }, + { + path: "test-fixtures/symlinks-simple", + expectedType: TypeDir, + expectedMIMEType: "", + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + info, err := os.Lstat(test.path) + require.NoError(t, err) + + actual := NewMetadataFromPath(test.path, info) + assert.Equal(t, test.expectedMIMEType, actual.MIMEType, "unexpected MIME type for %s", test.path) + assert.Equal(t, test.expectedType, actual.Type, "unexpected type for %s", test.path) + }) + } +} diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index 97df52ac..2fbd58de 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -2,8 +2,10 @@ package file type PathSet map[Path]struct{} -func NewPathSet() PathSet { - return make(PathSet) +func NewPathSet(is ...Path) PathSet { + s := make(PathSet) + s.Add(is...) + return s } func (s PathSet) Size() int { diff --git a/pkg/file/reference.go b/pkg/file/reference.go index 27da47bc..1109227d 100644 --- a/pkg/file/reference.go +++ b/pkg/file/reference.go @@ -1,8 +1,6 @@ package file -import ( - "fmt" -) +import "fmt" // Reference represents a unique file. This is useful when path is not good enough (i.e. you have the same file path for two files in two different container image layers, and you need to be able to distinguish them apart) type Reference struct { diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme new file mode 120000 index 00000000..e348d807 --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/link_to_link_to_new_readme @@ -0,0 +1 @@ +link_to_new_readme \ No newline at end of file diff --git a/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme new file mode 120000 index 00000000..ea786ff2 --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/link_to_new_readme @@ -0,0 +1 @@ +readme \ No newline at end of file diff --git a/pkg/file/test-fixtures/symlinks-simple/readme b/pkg/file/test-fixtures/symlinks-simple/readme new file mode 100644 index 00000000..df85b76a --- /dev/null +++ b/pkg/file/test-fixtures/symlinks-simple/readme @@ -0,0 +1,2 @@ +this directory exists for unit tests on irregular files. You can't see other files here because they are removed after each test. +This readme is a better version of Russell's teapot. From 033d3e4cc97730d10c5001cb58d538e9ac88cc3e Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 11:28:15 -0500 Subject: [PATCH 18/35] switch to generic set implementation Signed-off-by: Alex Goodman --- go.mod | 11 +- go.sum | 19 ++-- internal/set.go | 85 +++++++++++++++ internal/set_test.go | 226 +++++++++++++++++++++++++++++++++++++++ internal/stringset.go | 38 ------- pkg/file/id.go | 65 ++--------- pkg/file/path_set.go | 60 +---------- pkg/filetree/filetree.go | 10 +- pkg/filetree/index.go | 8 +- pkg/tree/node/id.go | 56 +--------- 10 files changed, 353 insertions(+), 225 deletions(-) create mode 100644 internal/set.go create mode 100644 internal/set_test.go delete mode 100644 internal/stringset.go diff --git a/go.mod b/go.mod index 1aafe9ab..d2193f60 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/docker/docker v20.10.12+incompatible github.com/gabriel-vasile/mimetype v1.4.0 github.com/go-test/deep v1.0.8 - github.com/google/go-cmp v0.5.6 + github.com/google/go-cmp v0.5.8 github.com/google/go-containerregistry v0.7.0 github.com/hashicorp/go-multierror v1.1.1 github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381 @@ -25,12 +25,13 @@ require ( github.com/scylladb/go-set v1.0.3-0.20200225121959-cc7b2070d91e github.com/sergi/go-diff v1.2.0 github.com/spf13/afero v1.6.0 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.8.1 github.com/sylabs/sif/v2 v2.8.1 github.com/sylabs/squashfs v0.6.1 github.com/wagoodman/go-partybus v0.0.0-20200526224238-eb215533f07d github.com/wagoodman/go-progress v0.0.0-20200621122631-1a2120f0695a golang.org/x/crypto v0.0.0-20220315160706-3147a52a75dd + golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b ) require ( @@ -68,14 +69,14 @@ require ( github.com/pierrec/lz4/v4 v4.1.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect - github.com/stretchr/objx v0.2.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect github.com/therootcompany/xz v1.0.1 // indirect github.com/ulikunitz/xz v0.5.10 // indirect github.com/vbatts/tar-split v0.11.2 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/sys v0.1.0 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/text v0.3.7 // indirect google.golang.org/appengine v1.6.7 // indirect @@ -84,5 +85,3 @@ require ( google.golang.org/protobuf v1.28.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) - -require golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect diff --git a/go.sum b/go.sum index 365ddab7..c99f5087 100644 --- a/go.sum +++ b/go.sum @@ -430,8 +430,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.7.0 h1:u0onUUOcyoCDHEiJoyR1R1gx5er1+r06V5DBhUU5ndk= github.com/google/go-containerregistry v0.7.0/go.mod h1:2zaoelrL0d08gGbpdP3LqyUuBmhWbpD6IOe2s9nLS2k= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -752,16 +753,21 @@ github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/sylabs/sif/v2 v2.8.1 h1:whr4Vz12RXfLnYyVGHoD/rD/hbF2g9OW7BJHa+WIqW8= github.com/sylabs/sif/v2 v2.8.1/go.mod h1:LQOdYXC9a8i7BleTKRw9lohi0rTbXkJOeS9u0ebvgyM= @@ -860,6 +866,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b h1:EqBVA+nNsObCwQoBEHy4wLU0pi7i8a4AL3pbItPdPkE= +golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1056,8 +1064,8 @@ golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1139,7 +1147,6 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.0.0-20160322025152-9bf6e6e569ff/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= diff --git a/internal/set.go b/internal/set.go new file mode 100644 index 00000000..e60db5ef --- /dev/null +++ b/internal/set.go @@ -0,0 +1,85 @@ +package internal + +import ( + "golang.org/x/exp/constraints" + "sort" +) + +type orderedComparable interface { + comparable + constraints.Ordered +} + +type OrderableSet[T orderedComparable] map[T]struct{} + +func NewOrderableSet[T orderedComparable](is ...T) OrderableSet[T] { + s := make(OrderableSet[T]) + s.Add(is...) + return s +} + +func NewStringSet(start ...string) OrderableSet[string] { + return NewOrderableSet(start...) +} + +func (s OrderableSet[T]) Size() int { + return len(s) +} + +func (s OrderableSet[T]) Merge(other OrderableSet[T]) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s OrderableSet[T]) Add(ids ...T) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s OrderableSet[T]) Remove(ids ...T) { + for _, i := range ids { + delete(s, i) + } +} + +func (s OrderableSet[T]) Contains(i T) bool { + _, ok := s[i] + return ok +} + +func (s OrderableSet[T]) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s OrderableSet[T]) List() []T { + ret := make([]T, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s OrderableSet[T]) Sorted() []T { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s OrderableSet[T]) ContainsAny(ids ...T) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/internal/set_test.go b/internal/set_test.go new file mode 100644 index 00000000..783c83af --- /dev/null +++ b/internal/set_test.go @@ -0,0 +1,226 @@ +package internal + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestOrderableSet_Size(t *testing.T) { + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + want int + } + tests := []testCase[string]{ + { + name: "empty set", + s: NewOrderableSet[string](), + want: 0, + }, + { + name: "non-empty set", + s: NewOrderableSet[string]("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestOrderableSet_Add(t *testing.T) { + type args[T orderedComparable] struct { + ids []T + } + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + args args[T] + } + tests := []testCase[string]{ + { + name: "add multiple", + s: NewOrderableSet[string](), + args: args[string]{ids: []string{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestOrderableSet_Remove(t *testing.T) { + type args[T orderedComparable] struct { + ids []T + } + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + args args[T] + expected []T + } + tests := []testCase[string]{ + { + name: "remove multiple", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{ids: []string{"a", "b"}}, + expected: []string{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestOrderableSet_Contains(t *testing.T) { + type args[T orderedComparable] struct { + i T + } + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + args args[T] + want bool + } + tests := []testCase[string]{ + { + name: "contains", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestOrderableSet_Clear(t *testing.T) { + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + } + tests := []testCase[string]{ + { + name: "go case", + s: NewOrderableSet[string]("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestOrderableSet_List(t *testing.T) { + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + want []T + } + tests := []testCase[string]{ + { + name: "go case", + s: NewOrderableSet[string]("a", "b", "c"), + want: []string{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestOrderableSet_Sorted(t *testing.T) { + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + want []T + } + tests := []testCase[string]{ + { + name: "go case", + s: NewOrderableSet[string]("a", "b", "c"), + want: []string{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestOrderableSet_ContainsAny(t *testing.T) { + type args[T orderedComparable] struct { + ids []T + } + type testCase[T orderedComparable] struct { + name string + s OrderableSet[T] + args args[T] + want bool + } + tests := []testCase[string]{ + { + name: "contains one", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{ids: []string{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{ids: []string{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewOrderableSet[string]("a", "b", "c"), + args: args[string]{ids: []string{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/internal/stringset.go b/internal/stringset.go deleted file mode 100644 index 327312b0..00000000 --- a/internal/stringset.go +++ /dev/null @@ -1,38 +0,0 @@ -package internal - -import "sort" - -type Set map[string]struct{} - -func NewStringSet(start ...string) Set { - ret := make(Set) - for _, s := range start { - ret.Add(s) - } - return ret -} - -func (s Set) Add(i string) { - s[i] = struct{}{} -} - -func (s Set) Remove(i string) { - delete(s, i) -} - -func (s Set) Contains(i string) bool { - _, ok := s[i] - return ok -} - -// ToSlice returns a sorted slice of strings that are contained within the set. -func (s Set) ToSlice() []string { - ret := make([]string, len(s)) - idx := 0 - for v := range s { - ret[idx] = v - idx++ - } - sort.Strings(ret) - return ret -} diff --git a/pkg/file/id.go b/pkg/file/id.go index 4f978533..53152894 100644 --- a/pkg/file/id.go +++ b/pkg/file/id.go @@ -1,6 +1,8 @@ package file -import "sort" +import ( + "github.com/anchore/stereoscope/internal" +) var nextID = 0 // note: this is governed by the reference constructor @@ -21,63 +23,8 @@ func (ids IDs) Swap(i, j int) { ids[i], ids[j] = ids[j], ids[i] } -type IDSet map[ID]struct{} +type IDSet = internal.OrderableSet[ID] -func NewIDSet() IDSet { - return make(IDSet) -} - -func (s IDSet) Size() int { - return len(s) -} - -func (s IDSet) Merge(other IDSet) { - for _, i := range other.List() { - s.Add(i) - } -} - -func (s IDSet) Add(ids ...ID) { - for _, i := range ids { - s[i] = struct{}{} - } -} - -func (s IDSet) Remove(ids ...ID) { - for _, i := range ids { - delete(s, i) - } -} - -func (s IDSet) Contains(i ID) bool { - _, ok := s[i] - return ok -} - -func (s IDSet) Clear() { - // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 - for i := range s { - delete(s, i) - } -} - -func (s IDSet) List() []ID { - ret := make([]ID, 0, len(s)) - for i := range s { - ret = append(ret, i) - } - - sort.Sort(IDs(ret)) - - return ret -} - -func (s IDSet) ContainsAny(ids ...ID) bool { - for _, i := range ids { - _, ok := s[i] - if ok { - return true - } - } - return false +func NewIDSet(ids ...ID) IDSet { + return internal.NewOrderableSet(ids...) } diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index 2fbd58de..b42cb535 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -1,61 +1,9 @@ package file -type PathSet map[Path]struct{} +import "github.com/anchore/stereoscope/internal" -func NewPathSet(is ...Path) PathSet { - s := make(PathSet) - s.Add(is...) - return s -} - -func (s PathSet) Size() int { - return len(s) -} - -func (s PathSet) Merge(other PathSet) { - for _, i := range other.List() { - s.Add(i) - } -} - -func (s PathSet) Add(ids ...Path) { - for _, i := range ids { - s[i] = struct{}{} - } -} - -func (s PathSet) Remove(ids ...Path) { - for _, i := range ids { - delete(s, i) - } -} - -func (s PathSet) Contains(i Path) bool { - _, ok := s[i] - return ok -} - -func (s PathSet) Clear() { - // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 - for i := range s { - delete(s, i) - } -} - -func (s PathSet) List() []Path { - ret := make([]Path, 0, len(s)) - for i := range s { - ret = append(ret, i) - } - return ret -} +type PathSet = internal.OrderableSet[Path] -func (s PathSet) ContainsAny(ids ...Path) bool { - for _, i := range ids { - _, ok := s[i] - if ok { - return true - } - } - return false +func NewPathSet(paths ...Path) PathSet { + return internal.NewOrderableSet(paths...) } diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index b1cbda87..93d4e13d 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -304,7 +304,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal.Set) (*nodeAccess, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.PathSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) @@ -379,14 +379,14 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths internal. // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen -func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths internal.Set) (*nodeAccess, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist if attemptedPaths == nil { - attemptedPaths = internal.NewStringSet() + attemptedPaths = file.NewPathSet() } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks @@ -435,12 +435,12 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) - if attemptedPaths.Contains(string(nextPath)) { + if attemptedPaths.Contains(nextPath) { return nil, ErrLinkCycleDetected } // get the next Node (based on the next path) - attemptedPaths.Add(string(nextPath)) + attemptedPaths.Add(nextPath) currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) if err != nil { if currentNodeAccess != nil { diff --git a/pkg/filetree/index.go b/pkg/filetree/index.go index e5e7369e..a7433d48 100644 --- a/pkg/filetree/index.go +++ b/pkg/filetree/index.go @@ -156,7 +156,7 @@ func (c *index) GetByFileType(fTypes ...file.Type) ([]IndexEntry, error) { continue } - for _, id := range fileIDs.List() { + for _, id := range fileIDs.Sorted() { entry, ok := c.index[id] if !ok { return nil, os.ErrNotExist @@ -180,7 +180,7 @@ func (c *index) GetByMIMEType(mTypes ...string) ([]IndexEntry, error) { continue } - for _, id := range fileIDs.List() { + for _, id := range fileIDs.Sorted() { entry, ok := c.index[id] if !ok { return nil, os.ErrNotExist @@ -204,7 +204,7 @@ func (c *index) GetByExtension(extensions ...string) ([]IndexEntry, error) { continue } - for _, id := range fileIDs.List() { + for _, id := range fileIDs.Sorted() { entry, ok := c.index[id] if !ok { return nil, os.ErrNotExist @@ -232,7 +232,7 @@ func (c *index) GetByBasename(basenames ...string) ([]IndexEntry, error) { continue } - for _, id := range fileIDs.List() { + for _, id := range fileIDs.Sorted() { entry, ok := c.index[id] if !ok { return nil, os.ErrNotExist diff --git a/pkg/tree/node/id.go b/pkg/tree/node/id.go index 4509eabe..7a47790e 100644 --- a/pkg/tree/node/id.go +++ b/pkg/tree/node/id.go @@ -1,57 +1,11 @@ package node -type ID string - -type IDSet map[ID]struct{} - -func NewIDSet() IDSet { - return make(IDSet) -} - -func (s IDSet) Merge(other IDSet) { - for _, i := range other.List() { - s.Add(i) - } -} - -func (s IDSet) Add(ids ...ID) { - for _, i := range ids { - s[i] = struct{}{} - } -} +import "github.com/anchore/stereoscope/internal" -func (s IDSet) Remove(ids ...ID) { - for _, i := range ids { - delete(s, i) - } -} - -func (s IDSet) Contains(i ID) bool { - _, ok := s[i] - return ok -} - -func (s IDSet) Clear() { - // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 - for i := range s { - delete(s, i) - } -} +type ID string -func (s IDSet) List() []ID { - ret := make([]ID, 0, len(s)) - for i := range s { - ret = append(ret, i) - } - return ret -} +type IDSet = internal.OrderableSet[ID] -func (s IDSet) ContainsAny(ids ...ID) bool { - for _, i := range ids { - _, ok := s[i] - if ok { - return true - } - } - return false +func NewIDSet(ids ...ID) IDSet { + return internal.NewOrderableSet(ids...) } From d19e1be7f9298a3f5f47f8b7255342a848b13ca1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 12:18:29 -0500 Subject: [PATCH 19/35] update linter Signed-off-by: Alex Goodman --- Makefile | 2 +- internal/set.go | 3 ++- pkg/file/type.go | 2 +- pkg/filetree/depth_first_path_walker.go | 2 +- pkg/filetree/filetree.go | 2 +- pkg/filetree/search.go | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 7bdbc175..181594bb 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ bootstrap: $(RESULTSDIR) ## Download and install all project dependencies (+ pre go mod download # install utilities [ -f "$(TEMPDIR)/benchstat" ] || GO111MODULE=off GOBIN=$(shell realpath $(TEMPDIR)) go get -u golang.org/x/perf/cmd/benchstat - [ -f "$(TEMPDIR)/golangci" ] || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.50.1 + [ -f "$(TEMPDIR)/golangci" ] || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.51.0 [ -f "$(TEMPDIR)/bouncer" ] || curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMPDIR)/ v0.4.0 .PHONY: static-analysis diff --git a/internal/set.go b/internal/set.go index e60db5ef..ad7be54b 100644 --- a/internal/set.go +++ b/internal/set.go @@ -1,8 +1,9 @@ package internal import ( - "golang.org/x/exp/constraints" "sort" + + "golang.org/x/exp/constraints" ) type orderedComparable interface { diff --git a/pkg/file/type.go b/pkg/file/type.go index 0d587a1b..817a22c0 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -37,7 +37,7 @@ func AllTypes() []Type { func TypeFromTarType(ty byte) Type { switch ty { - case tar.TypeReg, tar.TypeRegA: + case tar.TypeReg, tar.TypeRegA: //nolint: staticcheck return TypeReg case tar.TypeLink: return TypeHardLink diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index 586aa07b..061bf84a 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -54,7 +54,7 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions return w } -// nolint:gocognit +//nolint:gocognit func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) { w.pathStack.Push(from) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 93d4e13d..da2ced3a 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -378,7 +378,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.Path // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). -// nolint: funlen +//nolint: funlen func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 479aeaa7..f3521299 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -329,7 +329,7 @@ func (sc searchContext) pathsToNode(fn *filenode.FileNode, observedPaths file.Pa return paths, err } -// nolint: funlen +//nolint: funlen func (sc searchContext) _pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { if fn == nil { return nil, nil From 561c28639c15e491ad368ab7ee219de20f271469 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 13:15:26 -0500 Subject: [PATCH 20/35] replace generic set implemetation with plain set (unstable in go1.19) Signed-off-by: Alex Goodman --- .golangci.yaml | 23 +- go.mod | 1 - go.sum | 2 - internal/set.go | 86 ------- internal/string_set.go | 76 +++++++ internal/{set_test.go => string_set_test.go} | 132 +++++------ pkg/file/id.go | 10 - pkg/file/id_set.go | 75 ++++++ pkg/file/id_set_test.go | 226 +++++++++++++++++++ pkg/file/path_set.go | 76 ++++++- pkg/file/path_set_test.go | 226 +++++++++++++++++++ pkg/file/type.go | 2 +- pkg/filetree/filetree.go | 2 +- pkg/filetree/search.go | 2 +- pkg/tree/node/id.go | 73 +++++- pkg/tree/node/id_test.go | 226 +++++++++++++++++++ 16 files changed, 1052 insertions(+), 186 deletions(-) delete mode 100644 internal/set.go create mode 100644 internal/string_set.go rename internal/{set_test.go => string_set_test.go} (50%) create mode 100644 pkg/file/id_set.go create mode 100644 pkg/file/id_set_test.go create mode 100644 pkg/file/path_set_test.go create mode 100644 pkg/tree/node/id_test.go diff --git a/.golangci.yaml b/.golangci.yaml index 1cf5f179..a92c5c43 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -26,7 +26,6 @@ linters: - ineffassign - misspell - nakedret - - nolintlint - revive - staticcheck - stylecheck @@ -37,19 +36,23 @@ linters: - whitespace # do not enable... +# - deadcode # The owner seems to have abandoned the linter. Replaced by "unused". # - gochecknoglobals # - gochecknoinits # this is too aggressive # - godot # - godox # - goerr113 -# - golint # deprecated -# - gomnd # this is too aggressive -# - interfacer # this is a good idea, but is no longer supported and is prone to false positives -# - lll # without a way to specify per-line exception cases, this is not usable -# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations +# - golint # deprecated +# - gomnd # this is too aggressive +# - interfacer # this is a good idea, but is no longer supported and is prone to false positives +# - lll # without a way to specify per-line exception cases, this is not usable +# - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations # - nestif -# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code -# - scopelint # deprecated +# - nolintlint # as of go1.19 this conflicts with the behavior of gofmt, which is a deal-breaker (lint-fix will still fail when running lint) +# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code +# - rowserrcheck # not in a repo with sql, so this is not useful +# - scopelint # deprecated +# - structcheck # The owner seems to have abandoned the linter. Replaced by "unused". # - testpackage -# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) - +# - varcheck # The owner seems to have abandoned the linter. Replaced by "unused". +# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) diff --git a/go.mod b/go.mod index d2193f60..9cec3854 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,6 @@ require ( github.com/wagoodman/go-partybus v0.0.0-20200526224238-eb215533f07d github.com/wagoodman/go-progress v0.0.0-20200621122631-1a2120f0695a golang.org/x/crypto v0.0.0-20220315160706-3147a52a75dd - golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b ) require ( diff --git a/go.sum b/go.sum index c99f5087..33eeee5a 100644 --- a/go.sum +++ b/go.sum @@ -866,8 +866,6 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b h1:EqBVA+nNsObCwQoBEHy4wLU0pi7i8a4AL3pbItPdPkE= -golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/internal/set.go b/internal/set.go deleted file mode 100644 index ad7be54b..00000000 --- a/internal/set.go +++ /dev/null @@ -1,86 +0,0 @@ -package internal - -import ( - "sort" - - "golang.org/x/exp/constraints" -) - -type orderedComparable interface { - comparable - constraints.Ordered -} - -type OrderableSet[T orderedComparable] map[T]struct{} - -func NewOrderableSet[T orderedComparable](is ...T) OrderableSet[T] { - s := make(OrderableSet[T]) - s.Add(is...) - return s -} - -func NewStringSet(start ...string) OrderableSet[string] { - return NewOrderableSet(start...) -} - -func (s OrderableSet[T]) Size() int { - return len(s) -} - -func (s OrderableSet[T]) Merge(other OrderableSet[T]) { - for _, i := range other.List() { - s.Add(i) - } -} - -func (s OrderableSet[T]) Add(ids ...T) { - for _, i := range ids { - s[i] = struct{}{} - } -} - -func (s OrderableSet[T]) Remove(ids ...T) { - for _, i := range ids { - delete(s, i) - } -} - -func (s OrderableSet[T]) Contains(i T) bool { - _, ok := s[i] - return ok -} - -func (s OrderableSet[T]) Clear() { - // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 - for i := range s { - delete(s, i) - } -} - -func (s OrderableSet[T]) List() []T { - ret := make([]T, 0, len(s)) - for i := range s { - ret = append(ret, i) - } - return ret -} - -func (s OrderableSet[T]) Sorted() []T { - ids := s.List() - - sort.Slice(ids, func(i, j int) bool { - return ids[i] < ids[j] - }) - - return ids -} - -func (s OrderableSet[T]) ContainsAny(ids ...T) bool { - for _, i := range ids { - _, ok := s[i] - if ok { - return true - } - } - return false -} diff --git a/internal/string_set.go b/internal/string_set.go new file mode 100644 index 00000000..42f00165 --- /dev/null +++ b/internal/string_set.go @@ -0,0 +1,76 @@ +package internal + +import ( + "sort" +) + +type StringSet map[string]struct{} + +func NewStringSet(is ...string) StringSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(StringSet) + s.Add(is...) + return s +} + +func (s StringSet) Size() int { + return len(s) +} + +func (s StringSet) Merge(other StringSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s StringSet) Add(ids ...string) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s StringSet) Remove(ids ...string) { + for _, i := range ids { + delete(s, i) + } +} + +func (s StringSet) Contains(i string) bool { + _, ok := s[i] + return ok +} + +func (s StringSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s StringSet) List() []string { + ret := make([]string, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s StringSet) Sorted() []string { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s StringSet) ContainsAny(ids ...string) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/internal/set_test.go b/internal/string_set_test.go similarity index 50% rename from internal/set_test.go rename to internal/string_set_test.go index 783c83af..e04727ae 100644 --- a/internal/set_test.go +++ b/internal/string_set_test.go @@ -6,21 +6,21 @@ import ( "testing" ) -func TestOrderableSet_Size(t *testing.T) { - type testCase[T orderedComparable] struct { +func TestStringSet_Size(t *testing.T) { + type testCase struct { name string - s OrderableSet[T] + s StringSet want int } - tests := []testCase[string]{ + tests := []testCase{ { name: "empty set", - s: NewOrderableSet[string](), + s: NewStringSet(), want: 0, }, { name: "non-empty set", - s: NewOrderableSet[string]("items", "in", "set"), + s: NewStringSet("items", "in", "set"), want: 3, }, } @@ -33,20 +33,20 @@ func TestOrderableSet_Size(t *testing.T) { } } -func TestOrderableSet_Add(t *testing.T) { - type args[T orderedComparable] struct { - ids []T +func TestStringSet_Add(t *testing.T) { + type args struct { + ids []string } - type testCase[T orderedComparable] struct { + type testCase struct { name string - s OrderableSet[T] - args args[T] + s StringSet + args args } - tests := []testCase[string]{ + tests := []testCase{ { name: "add multiple", - s: NewOrderableSet[string](), - args: args[string]{ids: []string{"a", "b", "c"}}, + s: NewStringSet(), + args: args{ids: []string{"a", "b", "c"}}, }, } for _, tt := range tests { @@ -61,21 +61,21 @@ func TestOrderableSet_Add(t *testing.T) { } } -func TestOrderableSet_Remove(t *testing.T) { - type args[T orderedComparable] struct { - ids []T +func TestStringSet_Remove(t *testing.T) { + type args struct { + ids []string } - type testCase[T orderedComparable] struct { + type testCase struct { name string - s OrderableSet[T] - args args[T] - expected []T + s StringSet + args args + expected []string } - tests := []testCase[string]{ + tests := []testCase{ { name: "remove multiple", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{ids: []string{"a", "b"}}, + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "b"}}, expected: []string{"c"}, }, } @@ -96,27 +96,27 @@ func TestOrderableSet_Remove(t *testing.T) { } } -func TestOrderableSet_Contains(t *testing.T) { - type args[T orderedComparable] struct { - i T +func TestStringSet_Contains(t *testing.T) { + type args struct { + i string } - type testCase[T orderedComparable] struct { + type testCase struct { name string - s OrderableSet[T] - args args[T] + s StringSet + args args want bool } - tests := []testCase[string]{ + tests := []testCase{ { name: "contains", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{i: "a"}, + s: NewStringSet("a", "b", "c"), + args: args{i: "a"}, want: true, }, { name: "not contains", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{i: "x"}, + s: NewStringSet("a", "b", "c"), + args: args{i: "x"}, want: false, }, } @@ -129,15 +129,15 @@ func TestOrderableSet_Contains(t *testing.T) { } } -func TestOrderableSet_Clear(t *testing.T) { - type testCase[T orderedComparable] struct { +func TestStringSet_Clear(t *testing.T) { + type testCase struct { name string - s OrderableSet[T] + s StringSet } - tests := []testCase[string]{ + tests := []testCase{ { name: "go case", - s: NewOrderableSet[string]("a", "b", "c"), + s: NewStringSet("a", "b", "c"), }, } for _, tt := range tests { @@ -148,16 +148,16 @@ func TestOrderableSet_Clear(t *testing.T) { } } -func TestOrderableSet_List(t *testing.T) { - type testCase[T orderedComparable] struct { +func TestStringSet_List(t *testing.T) { + type testCase struct { name string - s OrderableSet[T] - want []T + s StringSet + want []string } - tests := []testCase[string]{ + tests := []testCase{ { name: "go case", - s: NewOrderableSet[string]("a", "b", "c"), + s: NewStringSet("a", "b", "c"), want: []string{"a", "b", "c"}, }, } @@ -168,16 +168,16 @@ func TestOrderableSet_List(t *testing.T) { } } -func TestOrderableSet_Sorted(t *testing.T) { - type testCase[T orderedComparable] struct { +func TestStringSet_Sorted(t *testing.T) { + type testCase struct { name string - s OrderableSet[T] - want []T + s StringSet + want []string } - tests := []testCase[string]{ + tests := []testCase{ { name: "go case", - s: NewOrderableSet[string]("a", "b", "c"), + s: NewStringSet("a", "b", "c"), want: []string{"a", "b", "c"}, }, } @@ -188,33 +188,33 @@ func TestOrderableSet_Sorted(t *testing.T) { } } -func TestOrderableSet_ContainsAny(t *testing.T) { - type args[T orderedComparable] struct { - ids []T +func TestStringSet_ContainsAny(t *testing.T) { + type args struct { + ids []string } - type testCase[T orderedComparable] struct { + type testCase struct { name string - s OrderableSet[T] - args args[T] + s StringSet + args args want bool } - tests := []testCase[string]{ + tests := []testCase{ { name: "contains one", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{ids: []string{"a", "x"}}, + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "x"}}, want: true, }, { name: "contains all", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{ids: []string{"a", "b"}}, + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"a", "b"}}, want: true, }, { name: "contains none", - s: NewOrderableSet[string]("a", "b", "c"), - args: args[string]{ids: []string{"x", "y"}}, + s: NewStringSet("a", "b", "c"), + args: args{ids: []string{"x", "y"}}, want: false, }, } diff --git a/pkg/file/id.go b/pkg/file/id.go index 53152894..75f05dc5 100644 --- a/pkg/file/id.go +++ b/pkg/file/id.go @@ -1,9 +1,5 @@ package file -import ( - "github.com/anchore/stereoscope/internal" -) - var nextID = 0 // note: this is governed by the reference constructor // ID is used for file tree manipulation to uniquely identify tree nodes. @@ -22,9 +18,3 @@ func (ids IDs) Less(i, j int) bool { func (ids IDs) Swap(i, j int) { ids[i], ids[j] = ids[j], ids[i] } - -type IDSet = internal.OrderableSet[ID] - -func NewIDSet(ids ...ID) IDSet { - return internal.NewOrderableSet(ids...) -} diff --git a/pkg/file/id_set.go b/pkg/file/id_set.go new file mode 100644 index 00000000..eebe00fa --- /dev/null +++ b/pkg/file/id_set.go @@ -0,0 +1,75 @@ +//nolint:dupl +package file + +import "sort" + +type IDSet map[ID]struct{} + +func NewIDSet(is ...ID) IDSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(IDSet) + s.Add(is...) + return s +} + +func (s IDSet) Size() int { + return len(s) +} + +func (s IDSet) Merge(other IDSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } +} + +func (s IDSet) Contains(i ID) bool { + _, ok := s[i] + return ok +} + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s IDSet) Sorted() []ID { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false +} diff --git a/pkg/file/id_set_test.go b/pkg/file/id_set_test.go new file mode 100644 index 00000000..b0d146db --- /dev/null +++ b/pkg/file/id_set_test.go @@ -0,0 +1,226 @@ +package file + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestIDSet_Size(t *testing.T) { + type testCase struct { + name string + s IDSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewIDSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewIDSet(1, 2, 3), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Add(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewIDSet(), + args: args{ids: []ID{1, 2, 3}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Remove(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + expected []ID + } + tests := []testCase{ + { + name: "remove multiple", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 2}}, + expected: []ID{3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Contains(t *testing.T) { + type args struct { + i ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewIDSet(1, 2, 3), + args: args{i: 1}, + want: true, + }, + { + name: "not contains", + s: NewIDSet(1, 2, 3), + args: args{i: 97}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Clear(t *testing.T) { + type testCase struct { + name string + s IDSet + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestIDSet_List(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + want: []ID{1, 2, 3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestIDSet_Sorted(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet(1, 2, 3), + want: []ID{1, 2, 3}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestIDSet_ContainsAny(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 97}}, + want: true, + }, + { + name: "contains all", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{1, 2}}, + want: true, + }, + { + name: "contains none", + s: NewIDSet(1, 2, 3), + args: args{ids: []ID{97, 98}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index b42cb535..a46f342a 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -1,9 +1,77 @@ +//nolint:dupl package file -import "github.com/anchore/stereoscope/internal" +import ( + "sort" +) -type PathSet = internal.OrderableSet[Path] +type PathSet map[Path]struct{} -func NewPathSet(paths ...Path) PathSet { - return internal.NewOrderableSet(paths...) +func NewPathSet(is ...Path) PathSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(PathSet) + s.Add(is...) + return s +} + +func (s PathSet) Size() int { + return len(s) +} + +func (s PathSet) Merge(other PathSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s PathSet) Add(ids ...Path) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s PathSet) Remove(ids ...Path) { + for _, i := range ids { + delete(s, i) + } +} + +func (s PathSet) Contains(i Path) bool { + _, ok := s[i] + return ok +} + +func (s PathSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s PathSet) List() []Path { + ret := make([]Path, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s PathSet) Sorted() []Path { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s PathSet) ContainsAny(ids ...Path) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false } diff --git a/pkg/file/path_set_test.go b/pkg/file/path_set_test.go new file mode 100644 index 00000000..5d296649 --- /dev/null +++ b/pkg/file/path_set_test.go @@ -0,0 +1,226 @@ +package file + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestPathSet_Size(t *testing.T) { + type testCase struct { + name string + s PathSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewPathSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewPathSet("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPathSet_Add(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewPathSet(), + args: args{ids: []Path{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestPathSet_Remove(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + expected []Path + } + tests := []testCase{ + { + name: "remove multiple", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "b"}}, + expected: []Path{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestPathSet_Contains(t *testing.T) { + type args struct { + i Path + } + type testCase struct { + name string + s PathSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewPathSet("a", "b", "c"), + args: args{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewPathSet("a", "b", "c"), + args: args{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPathSet_Clear(t *testing.T) { + type testCase struct { + name string + s PathSet + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestPathSet_List(t *testing.T) { + type testCase struct { + name string + s PathSet + want []Path + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + want: []Path{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestPathSet_Sorted(t *testing.T) { + type testCase struct { + name string + s PathSet + want []Path + } + tests := []testCase{ + { + name: "go case", + s: NewPathSet("a", "b", "c"), + want: []Path{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestPathSet_ContainsAny(t *testing.T) { + type args struct { + ids []Path + } + type testCase struct { + name string + s PathSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewPathSet("a", "b", "c"), + args: args{ids: []Path{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} diff --git a/pkg/file/type.go b/pkg/file/type.go index 817a22c0..0d587a1b 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -37,7 +37,7 @@ func AllTypes() []Type { func TypeFromTarType(ty byte) Type { switch ty { - case tar.TypeReg, tar.TypeRegA: //nolint: staticcheck + case tar.TypeReg, tar.TypeRegA: return TypeReg case tar.TypeLink: return TypeHardLink diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index da2ced3a..93d4e13d 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -378,7 +378,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.Path // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). -//nolint: funlen +// nolint: funlen func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index f3521299..479aeaa7 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -329,7 +329,7 @@ func (sc searchContext) pathsToNode(fn *filenode.FileNode, observedPaths file.Pa return paths, err } -//nolint: funlen +// nolint: funlen func (sc searchContext) _pathsToNode(fn *filenode.FileNode, observedPaths file.PathSet, cache map[cacheRequest]cacheResult) (file.PathSet, error) { if fn == nil { return nil, nil diff --git a/pkg/tree/node/id.go b/pkg/tree/node/id.go index 7a47790e..b023c733 100644 --- a/pkg/tree/node/id.go +++ b/pkg/tree/node/id.go @@ -1,11 +1,76 @@ package node -import "github.com/anchore/stereoscope/internal" +import "sort" type ID string -type IDSet = internal.OrderableSet[ID] +type IDSet map[ID]struct{} -func NewIDSet(ids ...ID) IDSet { - return internal.NewOrderableSet(ids...) +func NewIDSet(is ...ID) IDSet { + // TODO: replace with single generic implementation that also incorporates other set implementations + s := make(IDSet) + s.Add(is...) + return s +} + +func (s IDSet) Size() int { + return len(s) +} + +func (s IDSet) Merge(other IDSet) { + for _, i := range other.List() { + s.Add(i) + } +} + +func (s IDSet) Add(ids ...ID) { + for _, i := range ids { + s[i] = struct{}{} + } +} + +func (s IDSet) Remove(ids ...ID) { + for _, i := range ids { + delete(s, i) + } +} + +func (s IDSet) Contains(i ID) bool { + _, ok := s[i] + return ok +} + +func (s IDSet) Clear() { + // TODO: replace this with the new 'clear' keyword when it's available in go 1.20 or 1.21 + for i := range s { + delete(s, i) + } +} + +func (s IDSet) List() []ID { + ret := make([]ID, 0, len(s)) + for i := range s { + ret = append(ret, i) + } + return ret +} + +func (s IDSet) Sorted() []ID { + ids := s.List() + + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) + + return ids +} + +func (s IDSet) ContainsAny(ids ...ID) bool { + for _, i := range ids { + _, ok := s[i] + if ok { + return true + } + } + return false } diff --git a/pkg/tree/node/id_test.go b/pkg/tree/node/id_test.go new file mode 100644 index 00000000..222981bc --- /dev/null +++ b/pkg/tree/node/id_test.go @@ -0,0 +1,226 @@ +package node + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestIDSet_Size(t *testing.T) { + type testCase struct { + name string + s IDSet + want int + } + tests := []testCase{ + { + name: "empty set", + s: NewIDSet(), + want: 0, + }, + { + name: "non-empty set", + s: NewIDSet("items", "in", "set"), + want: 3, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Size(); got != tt.want { + t.Errorf("Size() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Add(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + } + tests := []testCase{ + { + name: "add multiple", + s: NewIDSet(), + args: args{ids: []ID{"a", "b", "c"}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Add(tt.args.ids...) + for _, id := range tt.args.ids { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Remove(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + expected []ID + } + tests := []testCase{ + { + name: "remove multiple", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "b"}}, + expected: []ID{"c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Remove(tt.args.ids...) + for _, id := range tt.args.ids { + if tt.s.Contains(id) { + t.Errorf("expected set to NOT contain %q", id) + } + } + for _, id := range tt.expected { + if !tt.s.Contains(id) { + t.Errorf("expected set to contain %q", id) + } + } + }) + } +} + +func TestIDSet_Contains(t *testing.T) { + type args struct { + i ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains", + s: NewIDSet("a", "b", "c"), + args: args{i: "a"}, + want: true, + }, + { + name: "not contains", + s: NewIDSet("a", "b", "c"), + args: args{i: "x"}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.s.Contains(tt.args.i); got != tt.want { + t.Errorf("Contains() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIDSet_Clear(t *testing.T) { + type testCase struct { + name string + s IDSet + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.Clear() + assert.Equal(t, 0, tt.s.Size()) + }) + } +} + +func TestIDSet_List(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + want: []ID{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.ElementsMatchf(t, tt.want, tt.s.List(), "List()") + }) + } +} + +func TestIDSet_Sorted(t *testing.T) { + type testCase struct { + name string + s IDSet + want []ID + } + tests := []testCase{ + { + name: "go case", + s: NewIDSet("a", "b", "c"), + want: []ID{"a", "b", "c"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.s.Sorted(), "Sorted()") + }) + } +} + +func TestIDSet_ContainsAny(t *testing.T) { + type args struct { + ids []ID + } + type testCase struct { + name string + s IDSet + args args + want bool + } + tests := []testCase{ + { + name: "contains one", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "x"}}, + want: true, + }, + { + name: "contains all", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"a", "b"}}, + want: true, + }, + { + name: "contains none", + s: NewIDSet("a", "b", "c"), + args: args{ids: []ID{"x", "y"}}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.s.ContainsAny(tt.args.ids...), fmt.Sprintf("ContainsAny(%v)", tt.args.ids)) + }) + } +} From 53005d81a6cade272c89994fa24fe0c33e8e8aa2 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 17:36:11 -0500 Subject: [PATCH 21/35] introduce filtree builter and foster usage of reader interfaces Signed-off-by: Alex Goodman --- pkg/filetree/builder.go | 53 ++++++++++ pkg/filetree/depth_first_path_walker_test.go | 4 +- pkg/filetree/filetree.go | 98 ++++------------- pkg/filetree/filetree_test.go | 82 +++++++------- pkg/filetree/glob_test.go | 8 +- pkg/filetree/index_test.go | 2 +- pkg/filetree/interfaces.go | 45 ++++++++ pkg/filetree/node_access.go | 50 +++++++++ pkg/filetree/search.go | 8 +- pkg/filetree/search_test.go | 28 ++--- pkg/filetree/union_filetree.go | 14 +-- pkg/filetree/union_filetree_test.go | 8 +- pkg/image/content_helpers.go | 6 +- pkg/image/docker/tarball_provider.go | 2 +- pkg/image/file_catalog.go | 19 +++- pkg/image/file_catalog_test.go | 14 +-- pkg/image/image.go | 37 +++++-- pkg/image/image_test.go | 2 +- pkg/image/layer.go | 100 +++++++----------- pkg/image/oci/directory_provider.go | 2 +- pkg/image/oci/registry_provider.go | 2 +- pkg/image/sif/provider.go | 2 +- test/integration/fixture_image_simple_test.go | 20 ++-- .../fixture_image_symlinks_test.go | 2 +- test/integration/utils_test.go | 24 ++--- 25 files changed, 364 insertions(+), 268 deletions(-) create mode 100644 pkg/filetree/builder.go create mode 100644 pkg/filetree/interfaces.go create mode 100644 pkg/filetree/node_access.go diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go new file mode 100644 index 00000000..36d94be6 --- /dev/null +++ b/pkg/filetree/builder.go @@ -0,0 +1,53 @@ +package filetree + +import ( + "fmt" + "github.com/anchore/stereoscope/pkg/file" +) + +// Builder is a helper for building a filetree and accompanying index in a coordinated fashion. +type Builder struct { + tree Writer + index IndexWriter +} + +func NewBuilder(tree Writer, index IndexWriter) *Builder { + return &Builder{ + tree: tree, + index: index, + } +} + +func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { + var ref *file.Reference + var err error + switch metadata.Type { + case file.TypeSymlink: + ref, err = b.tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) + if err != nil { + return nil, err + } + case file.TypeHardLink: + ref, err = b.tree.AddHardLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) + if err != nil { + return nil, err + } + case file.TypeDir: + ref, err = b.tree.AddDir(file.Path(metadata.Path)) + if err != nil { + return nil, err + } + default: + ref, err = b.tree.AddFile(file.Path(metadata.Path)) + if err != nil { + return nil, err + } + } + if ref == nil { + return nil, fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.LinkDestination) + } + + b.index.Add(*ref, metadata) + + return ref, nil +} diff --git a/pkg/filetree/depth_first_path_walker_test.go b/pkg/filetree/depth_first_path_walker_test.go index ee65a695..74678709 100644 --- a/pkg/filetree/depth_first_path_walker_test.go +++ b/pkg/filetree/depth_first_path_walker_test.go @@ -10,7 +10,7 @@ import ( ) func dfsTestTree(t *testing.T) (*FileTree, map[string]*file.Reference) { - tr := NewFileTree() + tr := New() possiblePaths := make(map[string]*file.Reference) @@ -233,7 +233,7 @@ func TestDFS_WalkAll_ConditionalBranchPruning(t *testing.T) { } func TestDFS_WalkAll_MaxDirDepthTerminatesTraversal(t *testing.T) { - tr := NewFileTree() + tr := New() possiblePaths := make(map[string]*file.Reference) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 93d4e13d..3045f2b2 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -19,75 +19,19 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") -type Reader interface { - File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) - FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) - // note: there are more reader-like functions, however, let's try to keep this interface small and simple for now -} - -type Walker interface { - Walk(fn func(path file.Path, f filenode.FileNode) error, conditions *WalkConditions) error -} - -type Writer interface { - AddFile(realPath file.Path) (*file.Reference, error) - AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) - AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) - AddDir(realPath file.Path) (*file.Reference, error) - RemovePath(path file.Path) error -} - -// nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. -type nodeAccess struct { - RequestPath file.Path - FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) - LeafLinkResolution []nodeAccess -} - -func (na *nodeAccess) HasFileNode() bool { - if na == nil { - return false - } - return na.FileNode != nil -} - -func (na *nodeAccess) FileReferenceVia() *file.ReferenceAccessVia { - if !na.HasFileNode() { - return nil - } - return file.NewFileReferenceVia( - na.RequestPath, - na.FileNode.Reference, - newReferenceAccessPath(na.LeafLinkResolution), - ) -} - -func (na *nodeAccess) References() []file.Reference { - if !na.HasFileNode() { - return nil - } - var refs []file.Reference - - if na.FileNode.Reference != nil { - refs = append(refs, *na.FileNode.Reference) - } - - for _, l := range na.LeafLinkResolution { - if l.HasFileNode() && l.FileNode.Reference != nil { - refs = append(refs, *l.FileNode.Reference) - } - } - - return refs -} - // FileTree represents a file/directory Tree type FileTree struct { tree *tree.Tree } // NewFileTree creates a new FileTree instance. +// Deprecated: use New() instead. func NewFileTree() *FileTree { + return New() +} + +// New creates a new FileTree instance. +func New() *FileTree { t := tree.NewTree() // Initialize FileTree with a root "/" Node @@ -99,8 +43,8 @@ func NewFileTree() *FileTree { } // Copy returns a Copy of the current FileTree. -func (t *FileTree) Copy() (*FileTree, error) { - ct := NewFileTree() +func (t *FileTree) Copy() (ReadWriter, error) { + ct := New() ct.tree = t.tree.Copy() return ct, nil } @@ -760,8 +704,8 @@ func (t *FileTree) RemoveChildPaths(path file.Path) error { return nil } -// Reader returns a tree.Reader useful for Tree traversal. -func (t *FileTree) Reader() tree.Reader { +// TreeReader returns a tree.Reader useful for Tree traversal. +func (t *FileTree) TreeReader() tree.Reader { return t.tree } @@ -817,12 +761,12 @@ func (t *FileTree) Walk(fn func(path file.Path, f filenode.FileNode) error, cond return NewDepthFirstPathWalker(t, fn, conditions).WalkAll() } -// merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there +// Merge takes the given Tree and combines it with the current Tree, preferring files in the other Tree if there // are path conflicts. This is the basis function for squashing (where the current Tree is the bottom Tree and the // given Tree is the top Tree). // //nolint:gocognit,funlen -func (t *FileTree) merge(upper *FileTree) error { +func (t *FileTree) Merge(upper Reader) error { conditions := tree.WalkConditions{ ShouldContinueBranch: func(n node.Node) bool { p := file.Path(n.ID()) @@ -840,22 +784,22 @@ func (t *FileTree) merge(upper *FileTree) error { } upperNode := n.(*filenode.FileNode) // opaque directories must be processed first - if upper.hasOpaqueDirectory(upperNode.RealPath) { + if hasOpaqueDirectory(upper, upperNode.RealPath) { err := t.RemoveChildPaths(upperNode.RealPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to remove child paths (upperPath=%s): %w", upperNode.RealPath, err) } } if upperNode.RealPath.IsWhiteout() { lowerPath, err := upperNode.RealPath.UnWhiteoutPath() if err != nil { - return fmt.Errorf("filetree merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to find original upperPath for whiteout (upperPath=%s): %w", upperNode.RealPath, err) } err = t.RemovePath(lowerPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err) + return fmt.Errorf("filetree Merge failed to remove upperPath (upperPath=%s): %w", lowerPath, err) } return nil @@ -866,7 +810,7 @@ func (t *FileTree) merge(upper *FileTree) error { FollowBasenameLinks: false, }) if err != nil { - return fmt.Errorf("filetree merge failed when looking for path=%q : %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed when looking for path=%q : %w", upperNode.RealPath, err) } if !lowerNode.HasFileNode() { // there is no existing Node... add parents and prepare to set @@ -887,12 +831,12 @@ func (t *FileTree) merge(upper *FileTree) error { // on removal of child paths err := t.RemoveChildPaths(upperNode.RealPath) if err != nil { - return fmt.Errorf("filetree merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err) + return fmt.Errorf("filetree Merge failed to remove children for non-directory upper node (%s): %w", upperNode.RealPath, err) } } // graft a copy of the upper Node with potential lower information into the lower tree if err := t.setFileNode(&nodeCopy); err != nil { - return fmt.Errorf("filetree merge failed to set file Node (Node=%+v): %w", nodeCopy, err) + return fmt.Errorf("filetree Merge failed to set file Node (Node=%+v): %w", nodeCopy, err) } return nil @@ -901,10 +845,10 @@ func (t *FileTree) merge(upper *FileTree) error { // we are using the tree walker instead of the path walker to only look at an resolve merging of real files // with no consideration to virtual paths (paths that are valid in the filetree because constituent paths // contain symlinks). - return tree.NewDepthFirstWalkerWithConditions(upper.Reader(), visitor, conditions).WalkAll() + return tree.NewDepthFirstWalkerWithConditions(upper.TreeReader(), visitor, conditions).WalkAll() } -func (t *FileTree) hasOpaqueDirectory(directoryPath file.Path) bool { +func hasOpaqueDirectory(t Reader, directoryPath file.Path) bool { opaqueWhiteoutChild := file.Path(path.Join(string(directoryPath), file.OpaqueWhiteout)) return t.HasPath(opaqueWhiteoutChild) } diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 987958d3..4dc9d23e 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -14,7 +14,7 @@ import ( ) func TestFileTree_AddPath(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home") fileNode, err := tr.AddFile(path) if err != nil { @@ -28,7 +28,7 @@ func TestFileTree_AddPath(t *testing.T) { } func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home/wagoodman/awesome/file.txt") fileNode, err := tr.AddFile(path) if err != nil { @@ -59,7 +59,7 @@ func TestFileTree_AddPathAndMissingAncestors(t *testing.T) { } func TestFileTree_RemovePath(t *testing.T) { - tr := NewFileTree() + tr := New() path := file.Path("/home/wagoodman/awesome/file.txt") _, err := tr.AddFile(path) if err != nil { @@ -88,7 +88,7 @@ func TestFileTree_RemovePath(t *testing.T) { func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) { var err error - tr := NewFileTree() + tr := New() _, err = tr.AddSymLink("/parent-link", "/parent") require.NoError(t, err) @@ -121,7 +121,7 @@ func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) { } func TestFileTree_FilesByGlob(t *testing.T) { - tr := NewFileTree() + tr := New() paths := []string{ "/home/wagoodman/awesome/file.txt", @@ -362,14 +362,14 @@ func TestFileTree_FilesByGlob(t *testing.T) { } func TestFileTree_Merge(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file-1.txt") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/file-2.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome/file-1.txt", "/home/wagoodman/awesome/file-2.txt"} { @@ -380,34 +380,34 @@ func TestFileTree_Merge(t *testing.T) { } func TestFileTree_Merge_Overwrite(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file.txt") - tr2 := NewFileTree() + tr2 := New() newRef, _ := tr2.AddFile("/home/wagoodman/awesome/file.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } _, f, _ := tr1.File("/home/wagoodman/awesome/file.txt") if f.ID() != newRef.ID() { - t.Fatalf("did not overwrite paths on merge") + t.Fatalf("did not overwrite paths on Merge") } } func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() _, err := tr1.AddFile("/home/wagoodman/awesome/file.txt") require.NoError(t, err) - tr2 := NewFileTree() + tr2 := New() _, err = tr2.AddFile("/home/wagoodman/.wh..wh..opq") require.NoError(t, err) - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman", "/home"} { @@ -425,14 +425,14 @@ func TestFileTree_Merge_OpaqueWhiteout(t *testing.T) { } func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/luhring/.wh..wh..opq") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/luhring", "/home"} { @@ -443,14 +443,14 @@ func TestFileTree_Merge_OpaqueWhiteout_NoLowerDirectory(t *testing.T) { } func TestFileTree_Merge_Whiteout(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/file.txt") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/.wh.file.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome", "/home/wagoodman", "/home"} { @@ -468,14 +468,14 @@ func TestFileTree_Merge_Whiteout(t *testing.T) { } func TestFileTree_Merge_DirOverride(t *testing.T) { - tr1 := NewFileTree() + tr1 := New() tr1.AddFile("/home/wagoodman/awesome/place") - tr2 := NewFileTree() + tr2 := New() tr2.AddFile("/home/wagoodman/awesome/place/thing.txt") - if err := tr1.merge(tr2); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := tr1.Merge(tr2); err != nil { + t.Fatalf("error on Merge : %+v", err) } for _, p := range []file.Path{"/home/wagoodman/awesome/place", "/home/wagoodman/awesome/place/thing.txt"} { @@ -499,17 +499,17 @@ func TestFileTree_Merge_DirOverride(t *testing.T) { } func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { - lowerTree := NewFileTree() + lowerTree := New() // add a file in the lower tree, which implicitly adds "/home/wagoodman/awesome/place" as a directory type lowerTree.AddFile("/home/wagoodman/awesome/place/thing.txt") - upperTree := NewFileTree() + upperTree := New() // add "/home/wagoodman/awesome/place" as a file type in the upper treee upperTree.AddFile("/home/wagoodman/awesome/place") // merge the upper tree into the lower tree - if err := lowerTree.merge(upperTree); err != nil { - t.Fatalf("error on merge : %+v", err) + if err := lowerTree.Merge(upperTree); err != nil { + t.Fatalf("error on Merge : %+v", err) } // the directory should still exist @@ -539,7 +539,7 @@ func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { func TestFileTree_File_MultiSymlink(t *testing.T) { var err error - tr := NewFileTree() + tr := New() _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") require.NoError(t, err) @@ -622,7 +622,7 @@ func TestFileTree_File_MultiSymlink(t *testing.T) { func TestFileTree_File_MultiSymlink_deadlink(t *testing.T) { var err error - tr := NewFileTree() + tr := New() _, err = tr.AddSymLink("/home", "/link-to-1/link-to-place") require.NoError(t, err) @@ -988,7 +988,7 @@ func TestFileTree_File_Symlink(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - tr := NewFileTree() + tr := New() _, err := tr.AddSymLink(test.buildLinkSource, test.buildLinkDest) if err != nil { t.Fatalf("unexpected an error on add link: %+v", err) @@ -1035,7 +1035,7 @@ func TestFileTree_File_Symlink(t *testing.T) { } func TestFileTree_File_MultipleIndirections(t *testing.T) { - tr := NewFileTree() + tr := New() // first indirection _, err := tr.AddSymLink("/home", "/another/place") if err != nil { @@ -1076,7 +1076,7 @@ func TestFileTree_File_MultipleIndirections(t *testing.T) { } func TestFileTree_File_CycleDetection(t *testing.T) { - tr := NewFileTree() + tr := New() // first indirection _, err := tr.AddSymLink("/home", "/another/place") if err != nil { @@ -1102,7 +1102,7 @@ func TestFileTree_File_CycleDetection(t *testing.T) { } func TestFileTree_File_DeadCycleDetection(t *testing.T) { - tr := NewFileTree() + tr := New() _, err := tr.AddSymLink("/somewhere/acorn", "noobaa-core/../acorn/bin/acorn") require.NoError(t, err) @@ -1119,7 +1119,7 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { } func TestFileTree_AllFiles(t *testing.T) { - tr := NewFileTree() + tr := New() paths := []string{ "/home/a-file.txt", diff --git a/pkg/filetree/glob_test.go b/pkg/filetree/glob_test.go index 3f47837d..e48c7ef6 100644 --- a/pkg/filetree/glob_test.go +++ b/pkg/filetree/glob_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileInfoAdapter(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/home/thing.txt") tr.AddDir("/home/wagoodman") tr.AddSymLink("/home/thing", "./thing.txt") @@ -118,7 +118,7 @@ func TestFileInfoAdapter(t *testing.T) { } func TestOsAdapter_PreventInfiniteLoop(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/usr/bin/busybox") tr.AddSymLink("/usr/bin/X11", ".") @@ -167,7 +167,7 @@ func TestOsAdapter_PreventInfiniteLoop(t *testing.T) { } func TestFileInfoAdapter_PreventInfiniteLoop(t *testing.T) { - tr := NewFileTree() + tr := New() tr.AddFile("/usr/bin/busybox") tr.AddSymLink("/usr/bin/X11", ".") @@ -470,7 +470,7 @@ func TestOSAdapter_Stat(t *testing.T) { } func newHelperTree() *FileTree { - tr := NewFileTree() + tr := New() tr.AddFile("/home/thing.txt") tr.AddDir("/home/wagoodman") tr.AddSymLink("/home/thing", "./thing.txt") diff --git a/pkg/filetree/index_test.go b/pkg/filetree/index_test.go index 436cb767..e436abb2 100644 --- a/pkg/filetree/index_test.go +++ b/pkg/filetree/index_test.go @@ -16,7 +16,7 @@ import ( func commonIndexFixture(t *testing.T) Index { t.Helper() - tree := NewFileTree() + tree := New() idx := NewIndex() addDir := func(path file.Path) { diff --git a/pkg/filetree/interfaces.go b/pkg/filetree/interfaces.go new file mode 100644 index 00000000..851c0853 --- /dev/null +++ b/pkg/filetree/interfaces.go @@ -0,0 +1,45 @@ +package filetree + +import ( + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" + "github.com/anchore/stereoscope/pkg/tree" +) + +type ReadWriter interface { + Reader + Writer +} + +type Reader interface { + AllFiles(types ...file.Type) []file.Reference + TreeReader() tree.Reader + PathReader + Walker + Copier +} + +type PathReader interface { + File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) + FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) + AllRealPaths() []file.Path + ListPaths(dir file.Path) ([]file.Path, error) + HasPath(path file.Path, options ...LinkResolutionOption) bool +} + +type Copier interface { + Copy() (ReadWriter, error) +} + +type Walker interface { + Walk(fn func(path file.Path, f filenode.FileNode) error, conditions *WalkConditions) error +} + +type Writer interface { + AddFile(realPath file.Path) (*file.Reference, error) + AddSymLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddHardLink(realPath file.Path, linkPath file.Path) (*file.Reference, error) + AddDir(realPath file.Path) (*file.Reference, error) + RemovePath(path file.Path) error + Merge(upper Reader) error +} diff --git a/pkg/filetree/node_access.go b/pkg/filetree/node_access.go new file mode 100644 index 00000000..ec7e752f --- /dev/null +++ b/pkg/filetree/node_access.go @@ -0,0 +1,50 @@ +package filetree + +import ( + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree/filenode" +) + +// nodeAccess represents a request into the tree for a specific path and the resulting node, which may have a different path. +type nodeAccess struct { + RequestPath file.Path + FileNode *filenode.FileNode // note: it is important that nodeAccess does not behave like FileNode (then it can be added to the tree directly) + LeafLinkResolution []nodeAccess +} + +func (na *nodeAccess) HasFileNode() bool { + if na == nil { + return false + } + return na.FileNode != nil +} + +func (na *nodeAccess) FileReferenceVia() *file.ReferenceAccessVia { + if !na.HasFileNode() { + return nil + } + return file.NewFileReferenceVia( + na.RequestPath, + na.FileNode.Reference, + newReferenceAccessPath(na.LeafLinkResolution), + ) +} + +func (na *nodeAccess) References() []file.Reference { + if !na.HasFileNode() { + return nil + } + var refs []file.Reference + + if na.FileNode.Reference != nil { + refs = append(refs, *na.FileNode.Reference) + } + + for _, l := range na.LeafLinkResolution { + if l.HasFileNode() && l.FileNode.Reference != nil { + refs = append(refs, *l.FileNode.Reference) + } + } + + return refs +} diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 479aeaa7..3b48f15c 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -21,17 +21,17 @@ type Searcher interface { } type searchContext struct { - tree *FileTree // this is the tree which all index search results are filtered against - index Index // this index is relative to one or more trees, not just necessarily one + tree *FileTree // this is the tree which all index search results are filtered against + index IndexReader // this index is relative to one or more trees, not just necessarily one // the following enables correct link resolution when searching via the index linkForwardRef map[node.ID]node.ID // {link-node-id: link-destination-node-id} linkBackwardRefs map[node.ID]node.IDSet // {link-destination-node-id: str([link-node-id, ...])} } -func NewSearchContext(tree *FileTree, index Index) Searcher { +func NewSearchContext(tree Reader, index IndexReader) Searcher { c := &searchContext{ - tree: tree, + tree: tree.(*FileTree), index: index, linkForwardRef: make(map[node.ID]node.ID), linkBackwardRefs: make(map[node.ID]node.IDSet), diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 82ed9dc7..30aadc3f 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -21,7 +21,7 @@ func Test_searchContext_SearchByPath(t *testing.T) { options []LinkResolutionOption } - tree := NewFileTree() + tree := New() ref, err := tree.AddFile("/path/to/file.txt") require.NoError(t, err) require.NotNil(t, ref) @@ -101,7 +101,7 @@ func Test_searchContext_SearchByGlob(t *testing.T) { options []LinkResolutionOption } - tree := NewFileTree() + tree := New() doubleLinkToPathRef, err := tree.AddSymLink("/double-link-to-path", "/link-to-path") require.NoError(t, err) require.NotNil(t, doubleLinkToPathRef) @@ -402,7 +402,7 @@ func Test_searchContext_SearchByMIMEType(t *testing.T) { mimeTypes string } - tree := NewFileTree() + tree := New() ref, err := tree.AddFile("/path/to/file.txt") require.NoError(t, err) require.NotNil(t, ref) @@ -492,7 +492,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to", }, input: func() input { - tree := NewFileTree() + tree := New() fileRef, err := tree.AddFile("/path/to/file.txt") require.NoError(t, err) @@ -523,7 +523,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() deafLinkRef, err := tree.AddSymLink("/link-to-file", "/path/to/dead/file.txt") require.NoError(t, err) @@ -561,7 +561,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/3", }, input: func() input { - tree := NewFileTree() + tree := New() link1, err := tree.AddSymLink("/1", "/2") require.NoError(t, err) @@ -605,7 +605,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/usr/bin/ttyd", }, input: func() input { - tree := NewFileTree() + tree := New() usrRef, err := tree.AddDir("/usr") require.NoError(t, err) @@ -657,7 +657,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() linkToFileRef, err := tree.AddSymLink("/link-to-file", "/path/to/file.txt") require.NoError(t, err) @@ -695,7 +695,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() doubleLinkToFileRef, err := tree.AddSymLink("/double-link-to-file", "/link-to-file") require.NoError(t, err) @@ -737,7 +737,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() dirTo, err := tree.AddDir("/path/to") require.NoError(t, err) @@ -780,7 +780,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() dirTo, err := tree.AddDir("/path/to") require.NoError(t, err) @@ -830,7 +830,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/file.txt", }, input: func() input { - tree := NewFileTree() + tree := New() dirTo, err := tree.AddDir("/path/to") require.NoError(t, err) @@ -893,7 +893,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/link-to-file", }, input: func() input { - tree := NewFileTree() + tree := New() linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "/link-to-to/another") require.NoError(t, err) @@ -961,7 +961,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { "/path/to/link-to-file", }, input: func() input { - tree := NewFileTree() + tree := New() linkToAnotherViaLinkRef, err := tree.AddSymLink("/path/link-to-another", "../link-to-to/another") require.NoError(t, err) diff --git a/pkg/filetree/union_filetree.go b/pkg/filetree/union_filetree.go index 5e30c0a9..c1606a42 100644 --- a/pkg/filetree/union_filetree.go +++ b/pkg/filetree/union_filetree.go @@ -3,28 +3,28 @@ package filetree import "fmt" type UnionFileTree struct { - trees []*FileTree + trees []ReadWriter } func NewUnionFileTree() *UnionFileTree { return &UnionFileTree{ - trees: make([]*FileTree, 0), + trees: make([]ReadWriter, 0), } } -func (u *UnionFileTree) PushTree(t *FileTree) { +func (u *UnionFileTree) PushTree(t ReadWriter) { u.trees = append(u.trees, t) } -func (u *UnionFileTree) Squash() (*FileTree, error) { +func (u *UnionFileTree) Squash() (ReadWriter, error) { switch len(u.trees) { case 0: - return NewFileTree(), nil + return New(), nil case 1: return u.trees[0].Copy() } - var squashedTree *FileTree + var squashedTree ReadWriter var err error for layerIdx, refTree := range u.trees { if layerIdx == 0 { @@ -35,7 +35,7 @@ func (u *UnionFileTree) Squash() (*FileTree, error) { continue } - if err = squashedTree.merge(refTree); err != nil { + if err = squashedTree.Merge(refTree); err != nil { return nil, fmt.Errorf("unable to squash layer=%d : %w", layerIdx, err) } } diff --git a/pkg/filetree/union_filetree_test.go b/pkg/filetree/union_filetree_test.go index 594252e1..adbd6103 100644 --- a/pkg/filetree/union_filetree_test.go +++ b/pkg/filetree/union_filetree_test.go @@ -8,7 +8,7 @@ import ( func TestUnionFileTree_Squash(t *testing.T) { ut := NewUnionFileTree() - base := NewFileTree() + base := New() base.AddFile("/home/wagoodman/some/stuff-1.txt") originalNode, _ := base.AddFile("/home/wagoodman/some/stuff-2-overlap.txt") @@ -16,7 +16,7 @@ func TestUnionFileTree_Squash(t *testing.T) { originalMore, _ := base.AddFile("/home/wagoodman/more") originalMoreDir, _ := base.AddDir("/home/wagoodman/moredir") - top := NewFileTree() + top := New() top.AddFile("/etc/redhat-release") // note: override /home/wagoodman/more (a file) as a directory top.AddFile("/home/wagoodman/more/things.txt") @@ -95,13 +95,13 @@ func TestUnionFileTree_Squash(t *testing.T) { func TestUnionFileTree_Squash_whiteout(t *testing.T) { ut := NewUnionFileTree() - base := NewFileTree() + base := New() base.AddFile("/some/stuff-1.txt") base.AddFile("/some/stuff-2.txt") base.AddFile("/other/things-1.txt") - top := NewFileTree() + top := New() top.AddFile("/some/" + file.OpaqueWhiteout) top.AddFile("/other/" + file.WhiteoutPrefix + "things-1.txt") diff --git a/pkg/image/content_helpers.go b/pkg/image/content_helpers.go index 92728f93..b38f811a 100644 --- a/pkg/image/content_helpers.go +++ b/pkg/image/content_helpers.go @@ -8,9 +8,9 @@ import ( "github.com/anchore/stereoscope/pkg/filetree" ) -// fetchFileContentsByPath is a common helper function for resolving the file contents for a path from the file +// fetchReaderByPath is a common helper function for resolving the file contents for a path from the file // catalog relative to the given tree. -func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, path file.Path) (io.ReadCloser, error) { +func fetchReaderByPath(ft filetree.Reader, fileCatalog FileCatalogReader, path file.Path) (io.ReadCloser, error) { exists, refVia, err := ft.File(path, filetree.FollowBasenameLinks) if err != nil { return nil, err @@ -19,7 +19,7 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa return nil, fmt.Errorf("could not find file path in Tree: %s", path) } - reader, err := fileCatalog.FileContents(*refVia.Reference) + reader, err := fileCatalog.Open(*refVia.Reference) if err != nil { return nil, err } diff --git a/pkg/image/docker/tarball_provider.go b/pkg/image/docker/tarball_provider.go index 6eb5ce73..663f809e 100644 --- a/pkg/image/docker/tarball_provider.go +++ b/pkg/image/docker/tarball_provider.go @@ -82,5 +82,5 @@ func (p *TarballImageProvider) Provide(_ context.Context, userMetadata ...image. return nil, err } - return image.NewImage(img, contentTempDir, metadata...), nil + return image.New(img, contentTempDir, metadata...), nil } diff --git a/pkg/image/file_catalog.go b/pkg/image/file_catalog.go index f9ccd713..e8ec637a 100644 --- a/pkg/image/file_catalog.go +++ b/pkg/image/file_catalog.go @@ -9,6 +9,12 @@ import ( "github.com/anchore/stereoscope/pkg/filetree" ) +type FileCatalogReader interface { + Layer(file.Reference) *Layer + Open(file.Reference) (io.ReadCloser, error) + filetree.IndexReader +} + // FileCatalog represents all file metadata and source tracing for all files contained within the image layer // blobs (i.e. everything except for the image index/manifest/metadata files). type FileCatalog struct { @@ -31,10 +37,13 @@ func NewFileCatalog() *FileCatalog { // Add creates a new FileCatalogEntry for the given file reference and metadata, cataloged by the ID of the // file reference (overwriting any existing entries without warning). func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener file.Opener) { - c.Index.Add(f, m) + c.Index.Add(f, m) // note: the index is already thread-safe + c.addImageReferences(f.ID(), l, opener) +} + +func (c *FileCatalog) addImageReferences(id file.ID, l *Layer, opener file.Opener) { c.Lock() defer c.Unlock() - id := f.ID() c.layerByID[id] = l c.openerByID[id] = opener } @@ -46,9 +55,9 @@ func (c *FileCatalog) Layer(f file.Reference) *Layer { return c.layerByID[f.ID()] } -// FileContents reads the file contents for the given file reference from the underlying image/layer blob. An error -// is returned if there is no file at the given path and layer or the read operation cannot continue. -func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) { +// Open returns a io.ReadCloser for the given file reference. The underlying io.ReadCloser will not attempt to +// allocate resources until the first read is performed. +func (c *FileCatalog) Open(f file.Reference) (io.ReadCloser, error) { c.RLock() defer c.RUnlock() diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index e6eddbf4..14af105c 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -109,7 +109,7 @@ func (t *testLayerContent) MediaType() (types.MediaType, error) { panic("not implemented") } -func TestFileCatalog_FileContents(t *testing.T) { +func TestFileCatalog_Open(t *testing.T) { fixtureFile := getTarFixture(t, "fixture-1") // a real path & contents from the fixture @@ -141,7 +141,7 @@ func TestFileCatalog_FileContents(t *testing.T) { catalog := NewFileCatalog() catalog.Add(*ref, metadata, layer, opener) - reader, err := catalog.FileContents(*ref) + reader, err := catalog.Open(*ref) require.NoError(t, err) actual, err := io.ReadAll(reader) @@ -205,7 +205,7 @@ func Test_fileExtensions(t *testing.T) { func TestFileCatalog_GetByExtension(t *testing.T) { fixtureTarFile := getTarFixture(t, "fixture-2") - ft := filetree.NewFileTree() + ft := filetree.New() fileCatalog := NewFileCatalog() var size int64 @@ -370,7 +370,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { func TestFileCatalog_GetByBasename(t *testing.T) { fixtureTarFile := getTarFixture(t, "fixture-2") - ft := filetree.NewFileTree() + ft := filetree.New() fileCatalog := NewFileCatalog() var size int64 @@ -472,7 +472,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { func TestFileCatalog_GetByBasenameGlob(t *testing.T) { fixtureTarFile := getTarFixture(t, "fixture-2") - ft := filetree.NewFileTree() + ft := filetree.New() fileCatalog := NewFileCatalog() var size int64 @@ -582,7 +582,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { func TestFileCatalog_GetByMimeType(t *testing.T) { fixtureTarFile := getTarFixture(t, "fixture-2") - ft := filetree.NewFileTree() + ft := filetree.New() fileCatalog := NewFileCatalog() var size int64 @@ -683,7 +683,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { func TestFileCatalog_GetBasenames(t *testing.T) { fixtureTarFile := getTarFixture(t, "fixture-2") - ft := filetree.NewFileTree() + ft := filetree.New() fileCatalog := NewFileCatalog() var size int64 diff --git a/pkg/image/image.go b/pkg/image/image.go index e7c3b3cf..208153e1 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -30,7 +30,7 @@ type Image struct { // Layers contains the rich layer objects in build order Layers []*Layer // FileCatalog contains all file metadata for all files in all layers - FileCatalog *FileCatalog + FileCatalog FileCatalogReader SquashedSearchContext filetree.Searcher @@ -130,11 +130,16 @@ func WithOS(o string) AdditionalMetadata { } // NewImage provides a new (unread) image object. +// Deprecated: use New() instead func NewImage(image v1.Image, contentCacheDir string, additionalMetadata ...AdditionalMetadata) *Image { + return New(image, contentCacheDir, additionalMetadata...) +} + +// New provides a new (unread) image object. +func New(image v1.Image, contentCacheDir string, additionalMetadata ...AdditionalMetadata) *Image { imgObj := &Image{ image: image, contentCacheDir: contentCacheDir, - FileCatalog: NewFileCatalog(), overrideMetadata: additionalMetadata, } return imgObj @@ -201,9 +206,11 @@ func (i *Image) Read() error { // let consumers know of a monitorable event (image save + copy stages) readProg := i.trackReadProgress(i.Metadata) + fileCatalog := NewFileCatalog() + for idx, v1Layer := range v1Layers { layer := NewLayer(v1Layer) - err := layer.Read(i.FileCatalog, i.Metadata, idx, i.contentCacheDir) + err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir) if err != nil { return err } @@ -218,7 +225,8 @@ func (i *Image) Read() error { // in order to resolve symlinks all squashed trees must be available err = i.squash(readProg) - i.SquashedSearchContext = filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog.Index) + i.FileCatalog = fileCatalog + i.SquashedSearchContext = filetree.NewSearchContext(i.SquashedTree(), i.FileCatalog) return err } @@ -226,11 +234,11 @@ func (i *Image) Read() error { // squash generates a squash tree for each layer in the image. For instance, layer 2 squash = // squash(layer 0, layer 1, layer 2), layer 3 squash = squash(layer 0, layer 1, layer 2, layer 3), and so on. func (i *Image) squash(prog *progress.Manual) error { - var lastSquashTree *filetree.FileTree + var lastSquashTree filetree.ReadWriter for idx, layer := range i.Layers { if idx == 0 { - lastSquashTree = layer.Tree + lastSquashTree = layer.Tree.(filetree.ReadWriter) layer.SquashedTree = layer.Tree layer.SquashedSearchContext = filetree.NewSearchContext(layer.SquashedTree, layer.fileCatalog.Index) continue @@ -238,7 +246,7 @@ func (i *Image) squash(prog *progress.Manual) error { var unionTree = filetree.NewUnionFileTree() unionTree.PushTree(lastSquashTree) - unionTree.PushTree(layer.Tree) + unionTree.PushTree(layer.Tree.(filetree.ReadWriter)) squashedTree, err := unionTree.Squash() if err != nil { @@ -258,11 +266,11 @@ func (i *Image) squash(prog *progress.Manual) error { } // SquashedTree returns the pre-computed image squash file tree. -func (i *Image) SquashedTree() *filetree.FileTree { +func (i *Image) SquashedTree() filetree.Reader { layerCount := len(i.Layers) if layerCount == 0 { - return filetree.NewFileTree() + return filetree.New() } topLayer := i.Layers[layerCount-1] @@ -272,7 +280,7 @@ func (i *Image) SquashedTree() *filetree.FileTree { // FileContentsFromSquash fetches file contents for a single path, relative to the image squash tree. // If the path does not exist an error is returned. func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(i.SquashedTree(), i.FileCatalog, path) + return fetchReaderByPath(i.SquashedTree(), i.FileCatalog, path) } // FilesByMIMETypeFromSquash returns file references for files that match at least one of the given MIME types. @@ -291,10 +299,17 @@ func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } +// OpenFile fetches file contents for a single file reference, regardless of the source layer. +// If the path does not exist an error is returned. +func (i *Image) OpenFile(ref file.Reference) (io.ReadCloser, error) { + return i.FileCatalog.Open(ref) +} + // FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. +// Deprecated: please use OpenFile() instead. func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { - return i.FileCatalog.FileContents(ref) + return i.FileCatalog.Open(ref) } // ResolveLinkByLayerSquash resolves a symlink or hardlink for the given file reference relative to the result from diff --git a/pkg/image/image_test.go b/pkg/image/image_test.go index 1030b48d..3a44a626 100644 --- a/pkg/image/image_test.go +++ b/pkg/image/image_test.go @@ -98,7 +98,7 @@ func TestImageAdditionalMetadata(t *testing.T) { os.Remove(tempFile.Name()) }) - img := NewImage(nil, tempFile.Name(), test.options...) + img := New(nil, tempFile.Name(), test.options...) err = img.applyOverrideMetadata() if err != nil { diff --git a/pkg/image/layer.go b/pkg/image/layer.go index fe2bb6ea..4f1f0a6f 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -32,10 +32,10 @@ type Layer struct { // Metadata contains select layer attributes Metadata LayerMetadata // Tree is a filetree that represents the structure of the layer tar contents ("diff tree") - Tree *filetree.FileTree + Tree filetree.Reader // SquashedTree is a filetree that represents the combination of this layers diff tree and all diff trees // in lower layers relative to this one. - SquashedTree *filetree.FileTree + SquashedTree filetree.Reader // fileCatalog contains all file metadata for all files in all layers (not just this layer) fileCatalog *FileCatalog SquashedSearchContext filetree.Searcher @@ -81,7 +81,8 @@ func (l *Layer) uncompressedTarCache(uncompressedLayersCacheDir string) (string, // file tree, and the layer squash tree. func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string) error { var err error - l.Tree = filetree.NewFileTree() + tree := filetree.New() + l.Tree = tree l.fileCatalog = catalog l.Metadata, err = newLayerMetadata(imgMetadata, l.layer, idx) if err != nil { @@ -111,7 +112,7 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp l.indexedContent, err = file.NewTarIndex( tarFilePath, - layerTarIndexer(l.Tree, l.fileCatalog, &l.Metadata.Size, l, monitor), + layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor), ) if err != nil { return fmt.Errorf("failed to read layer=%q tar : %w", l.Metadata.Digest, err) @@ -126,9 +127,9 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp // Walk the more efficient walk if we're blessed with an io.ReaderAt. if ra, ok := r.(io.ReaderAt); ok { - err = file.WalkSquashFS(ra, l.squashfsVisitor(monitor)) + err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) } else { - err = file.WalkSquashFSFromReader(r, l.squashfsVisitor(monitor)) + err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor)) } if err != nil { return fmt.Errorf("failed to walk layer=%q: %w", l.Metadata.Digest, err) @@ -145,16 +146,30 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return nil } +// OpenFile reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". +// An error is returned if there is no file at the given path and layer or the read operation cannot continue. +func (l *Layer) OpenFile(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(l.Tree, l.fileCatalog, path) +} + +// OpenFileFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. +// An error is returned if there is no file at the given path and layer or the read operation cannot continue. +func (l *Layer) OpenFileFromSquash(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) +} + // FileContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". // An error is returned if there is no file at the given path and layer or the read operation cannot continue. +// Deprecated: use OpenFile() instead. func (l *Layer) FileContents(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(l.Tree, l.fileCatalog, path) + return fetchReaderByPath(l.Tree, l.fileCatalog, path) } // FileContentsFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. // An error is returned if there is no file at the given path and layer or the read operation cannot continue. +// Deprecated: use OpenFileFromSquash() instead. func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { - return fetchFileContentsByPath(l.SquashedTree, l.fileCatalog, path) + return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) } // FilesByMIMEType returns file references for files that match at least one of the given MIME types relative to each layer tree. @@ -189,7 +204,9 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { +func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor { + builder := filetree.NewBuilder(ft, fileCatalog.Index) + return func(index file.TarIndexEntry) error { var err error var entry = index.ToTarFileEntry() @@ -212,37 +229,15 @@ func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int6 // // In summary: the set of all FileTrees can have NON-leaf nodes that don't exist in the FileCatalog, but // the FileCatalog should NEVER have entries that don't appear in one (or more) FileTree(s). - var fileReference *file.Reference - switch metadata.Type { - case file.TypeSymlink: - fileReference, err = ft.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) - if err != nil { - return err - } - case file.TypeHardLink: - fileReference, err = ft.AddHardLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) - if err != nil { - return err - } - case file.TypeDir: - fileReference, err = ft.AddDir(file.Path(metadata.Path)) - if err != nil { - return err - } - default: - fileReference, err = ft.AddFile(file.Path(metadata.Path)) - if err != nil { - return err - } - } - if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during tar iteration", metadata.Path, metadata.LinkDestination) + ref, err := builder.Add(metadata) + if err != nil { + return err } if size != nil { *(size) += metadata.Size } - fileCatalog.Add(*fileReference, metadata, layerRef, index.Open) + fileCatalog.addImageReferences(ref.ID(), layerRef, index.Open) if monitor != nil { monitor.N++ @@ -251,7 +246,9 @@ func layerTarIndexer(ft *filetree.FileTree, fileCatalog *FileCatalog, size *int6 } } -func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { +func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.SquashFSVisitor { + builder := filetree.NewBuilder(ft, fileCatalog.Index) + return func(fsys fs.FS, path string, d fs.DirEntry) error { ff, err := fsys.Open(path) if err != nil { @@ -261,7 +258,7 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { f, ok := ff.(*squashfs.File) if !ok { - return errors.New("unexpected file type") + return errors.New("unexpected file type from squashfs") } metadata, err := file.NewMetadataFromSquashFSFile(path, f) @@ -269,32 +266,15 @@ func (l *Layer) squashfsVisitor(monitor *progress.Manual) file.SquashFSVisitor { return err } - var fileReference *file.Reference - - switch { - case f.IsSymlink(): - fileReference, err = l.Tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) - if err != nil { - return err - } - case f.IsDir(): - fileReference, err = l.Tree.AddDir(file.Path(metadata.Path)) - if err != nil { - return err - } - default: - fileReference, err = l.Tree.AddFile(file.Path(metadata.Path)) - if err != nil { - return err - } + fileReference, err := builder.Add(metadata) + if err != nil { + return err } - if fileReference == nil { - return fmt.Errorf("could not add path=%q link=%q during squashfs iteration", metadata.Path, metadata.LinkDestination) + if size != nil { + *(size) += metadata.Size } - - l.Metadata.Size += metadata.Size - l.fileCatalog.Add(*fileReference, metadata, l, func() io.ReadCloser { + fileCatalog.addImageReferences(fileReference.ID(), layerRef, func() io.ReadCloser { r, err := fsys.Open(path) if err != nil { // The file.Opener interface doesn't give us a way to return an error, and callers diff --git a/pkg/image/oci/directory_provider.go b/pkg/image/oci/directory_provider.go index 643b6cb1..08220cb5 100644 --- a/pkg/image/oci/directory_provider.go +++ b/pkg/image/oci/directory_provider.go @@ -69,5 +69,5 @@ func (p *DirectoryImageProvider) Provide(_ context.Context, userMetadata ...imag return nil, err } - return image.NewImage(img, contentTempDir, metadata...), nil + return image.New(img, contentTempDir, metadata...), nil } diff --git a/pkg/image/oci/registry_provider.go b/pkg/image/oci/registry_provider.go index 7768a407..52128a62 100644 --- a/pkg/image/oci/registry_provider.go +++ b/pkg/image/oci/registry_provider.go @@ -80,7 +80,7 @@ func (p *RegistryImageProvider) Provide(ctx context.Context, userMetadata ...ima // apply user-supplied metadata last to override any default behavior metadata = append(metadata, userMetadata...) - return image.NewImage(img, imageTempDir, metadata...), nil + return image.New(img, imageTempDir, metadata...), nil } func prepareReferenceOptions(registryOptions image.RegistryOptions) []name.Option { diff --git a/pkg/image/sif/provider.go b/pkg/image/sif/provider.go index 6f6a6738..e30f2033 100644 --- a/pkg/image/sif/provider.go +++ b/pkg/image/sif/provider.go @@ -51,5 +51,5 @@ func (p *SingularityImageProvider) Provide(ctx context.Context, userMetadata ... } metadata = append(metadata, userMetadata...) - return image.NewImage(ui, contentCacheDir, metadata...), nil + return image.New(ui, contentCacheDir, metadata...), nil } diff --git a/test/integration/fixture_image_simple_test.go b/test/integration/fixture_image_simple_test.go index 0b13a95f..44260c84 100644 --- a/test/integration/fixture_image_simple_test.go +++ b/test/integration/fixture_image_simple_test.go @@ -177,7 +177,7 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { b.Run(c.source, func(b *testing.B) { for i := 0; i < b.N; i++ { for _, ref := range paths { - f, err := img.FileCatalog.FileContents(ref) + f, err := img.FileCatalog.Open(ref) if err != nil { b.Fatalf("unable to read: %+v", err) } @@ -230,20 +230,20 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { t.Helper() //t.Log("Asserting squashed trees...") - one := filetree.NewFileTree() + one := filetree.New() one.AddFile("/somefile-1.txt") - two := filetree.NewFileTree() + two := filetree.New() two.AddFile("/somefile-1.txt") two.AddFile("/somefile-2.txt") - three := filetree.NewFileTree() + three := filetree.New() three.AddFile("/somefile-1.txt") three.AddFile("/somefile-2.txt") three.AddFile("/really/.wh..wh..opq") three.AddFile("/really/nested/file-3.txt") - expectedTrees := map[uint]*filetree.FileTree{ + expectedTrees := map[uint]filetree.Reader{ 0: one, 1: two, 2: three, @@ -257,7 +257,7 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { compareLayerSquashTrees(t, expectedTrees, i, ignorePaths) - squashed := filetree.NewFileTree() + squashed := filetree.New() squashed.AddFile("/somefile-1.txt") squashed.AddFile("/somefile-2.txt") squashed.AddFile("/really/nested/file-3.txt") @@ -269,17 +269,17 @@ func assertImageSimpleTrees(t *testing.T, i *image.Image) { t.Helper() //t.Log("Asserting trees...") - one := filetree.NewFileTree() + one := filetree.New() one.AddFile("/somefile-1.txt") - two := filetree.NewFileTree() + two := filetree.New() two.AddFile("/somefile-2.txt") - three := filetree.NewFileTree() + three := filetree.New() three.AddFile("/really/.wh..wh..opq") three.AddFile("/really/nested/file-3.txt") - expectedTrees := map[uint]*filetree.FileTree{ + expectedTrees := map[uint]filetree.Reader{ 0: one, 1: two, 2: three, diff --git a/test/integration/fixture_image_symlinks_test.go b/test/integration/fixture_image_symlinks_test.go index 39ad6304..b72737bd 100644 --- a/test/integration/fixture_image_symlinks_test.go +++ b/test/integration/fixture_image_symlinks_test.go @@ -136,7 +136,7 @@ func fetchRefs(t *testing.T, i *image.Image, cfg linkFetchConfig) (*file.Referen } func fetchContents(t *testing.T, i *image.Image, cfg linkFetchConfig) string { - contents, err := i.Layers[cfg.perspectiveLayer].FileContentsFromSquash(file.Path(cfg.linkPath)) + contents, err := i.Layers[cfg.perspectiveLayer].OpenFileFromSquash(file.Path(cfg.linkPath)) if err != nil { t.Fatalf("could not fetch contents of %+v: %+v", cfg.linkPath, err) } diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 6ca7b53a..4f9fa268 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -9,13 +9,13 @@ import ( "github.com/anchore/stereoscope/pkg/image" ) -func compareLayerSquashTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *image.Image, ignorePaths []file.Path) { +func compareLayerSquashTrees(t *testing.T, expected map[uint]filetree.Reader, i *image.Image, ignorePaths []file.Path) { t.Helper() if len(expected) != len(i.Layers) { t.Fatalf("mismatched layers (%d!=%d)", len(expected), len(i.Layers)) } - var actual = make([]*filetree.FileTree, 0) + var actual = make([]filetree.Reader, 0) for _, l := range i.Layers { actual = append(actual, l.SquashedTree) } @@ -23,13 +23,13 @@ func compareLayerSquashTrees(t *testing.T, expected map[uint]*filetree.FileTree, compareTrees(t, expected, actual, ignorePaths) } -func compareLayerTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *image.Image, ignorePaths []file.Path) { +func compareLayerTrees(t *testing.T, expected map[uint]filetree.Reader, i *image.Image, ignorePaths []file.Path) { t.Helper() if len(expected) != len(i.Layers) { t.Fatalf("mismatched layers (%d!=%d)", len(expected), len(i.Layers)) } - var actual = make([]*filetree.FileTree, 0) + var actual = make([]filetree.Reader, 0) for _, l := range i.Layers { actual = append(actual, l.Tree) } @@ -37,13 +37,13 @@ func compareLayerTrees(t *testing.T, expected map[uint]*filetree.FileTree, i *im compareTrees(t, expected, actual, ignorePaths) } -func compareTrees(t *testing.T, expected map[uint]*filetree.FileTree, actual []*filetree.FileTree, ignorePaths []file.Path) { +func compareTrees(t *testing.T, expected map[uint]filetree.Reader, actual []filetree.Reader, ignorePaths []file.Path) { t.Helper() - for idx, expected := range expected { - actual := actual[idx] - if !expected.Equal(actual) { - extra, missing := expected.PathDiff(actual) + for idx, e := range expected { + a := actual[idx] + if !e.(*filetree.FileTree).Equal(a.(*filetree.FileTree)) { + extra, missing := e.(*filetree.FileTree).PathDiff(a.(*filetree.FileTree)) nonIgnoredPaths := 0 for _, p := range extra { @@ -82,11 +82,11 @@ func compareTrees(t *testing.T, expected map[uint]*filetree.FileTree, actual []* } } -func compareSquashTree(t *testing.T, expected *filetree.FileTree, i *image.Image) { +func compareSquashTree(t *testing.T, expected filetree.Reader, i *image.Image) { t.Helper() actual := i.SquashedTree() - if !expected.Equal(actual) { + if !expected.(*filetree.FileTree).Equal(actual.(*filetree.FileTree)) { t.Log("Walking expected squashed tree:") err := expected.Walk(func(p file.Path, _ filenode.FileNode) error { t.Log(" ", p) @@ -105,7 +105,7 @@ func compareSquashTree(t *testing.T, expected *filetree.FileTree, i *image.Image t.Fatalf("failed to walk tree: %+v", err) } - extra, missing := expected.PathDiff(actual) + extra, missing := expected.(*filetree.FileTree).PathDiff(actual.(*filetree.FileTree)) t.Errorf("path differences: extra=%+v missing=%+v", extra, missing) t.Errorf("mismatched squashed trees") } From 6b109bbf47aaa69808ccfe6d977f621bf97f912d Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 3 Feb 2023 17:52:00 -0500 Subject: [PATCH 22/35] rename content helper functions Signed-off-by: Alex Goodman --- examples/basic.go | 2 +- pkg/image/image.go | 13 ++++++-- pkg/image/layer.go | 12 +++---- .../fixture_image_symlinks_test.go | 32 ++++++------------- 4 files changed, 27 insertions(+), 32 deletions(-) diff --git a/examples/basic.go b/examples/basic.go index 23d3883a..687245b9 100644 --- a/examples/basic.go +++ b/examples/basic.go @@ -89,7 +89,7 @@ func main() { ////////////////////////////////////////////////////////////////// // Fetch file contents from the (squashed) image filePath := file.Path("/etc/group") - contentReader, err := image.FileContentsFromSquash(filePath) + contentReader, err := image.OpenPathFromSquash(filePath) if err != nil { panic(err) } diff --git a/pkg/image/image.go b/pkg/image/image.go index 208153e1..b415ca41 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -277,8 +277,15 @@ func (i *Image) SquashedTree() filetree.Reader { return topLayer.SquashedTree } +// OpenPathFromSquash fetches file contents for a single path, relative to the image squash tree. +// If the path does not exist an error is returned. +func (i *Image) OpenPathFromSquash(path file.Path) (io.ReadCloser, error) { + return fetchReaderByPath(i.SquashedTree(), i.FileCatalog, path) +} + // FileContentsFromSquash fetches file contents for a single path, relative to the image squash tree. // If the path does not exist an error is returned. +// Deprecated: use OpenPathFromSquash() instead. func (i *Image) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { return fetchReaderByPath(i.SquashedTree(), i.FileCatalog, path) } @@ -299,15 +306,15 @@ func (i *Image) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference return refs, nil } -// OpenFile fetches file contents for a single file reference, regardless of the source layer. +// OpenReference fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. -func (i *Image) OpenFile(ref file.Reference) (io.ReadCloser, error) { +func (i *Image) OpenReference(ref file.Reference) (io.ReadCloser, error) { return i.FileCatalog.Open(ref) } // FileContentsByRef fetches file contents for a single file reference, regardless of the source layer. // If the path does not exist an error is returned. -// Deprecated: please use OpenFile() instead. +// Deprecated: please use OpenReference() instead. func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { return i.FileCatalog.Open(ref) } diff --git a/pkg/image/layer.go b/pkg/image/layer.go index 4f1f0a6f..f0b1267d 100644 --- a/pkg/image/layer.go +++ b/pkg/image/layer.go @@ -146,28 +146,28 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp return nil } -// OpenFile reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". +// OpenPath reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". // An error is returned if there is no file at the given path and layer or the read operation cannot continue. -func (l *Layer) OpenFile(path file.Path) (io.ReadCloser, error) { +func (l *Layer) OpenPath(path file.Path) (io.ReadCloser, error) { return fetchReaderByPath(l.Tree, l.fileCatalog, path) } -// OpenFileFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. +// OpenPathFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. // An error is returned if there is no file at the given path and layer or the read operation cannot continue. -func (l *Layer) OpenFileFromSquash(path file.Path) (io.ReadCloser, error) { +func (l *Layer) OpenPathFromSquash(path file.Path) (io.ReadCloser, error) { return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) } // FileContents reads the file contents for the given path from the underlying layer blob, relative to the layers "diff tree". // An error is returned if there is no file at the given path and layer or the read operation cannot continue. -// Deprecated: use OpenFile() instead. +// Deprecated: use OpenPath() instead. func (l *Layer) FileContents(path file.Path) (io.ReadCloser, error) { return fetchReaderByPath(l.Tree, l.fileCatalog, path) } // FileContentsFromSquash reads the file contents for the given path from the underlying layer blob, relative to the layers squashed file tree. // An error is returned if there is no file at the given path and layer or the read operation cannot continue. -// Deprecated: use OpenFileFromSquash() instead. +// Deprecated: use OpenPathFromSquash() instead. func (l *Layer) FileContentsFromSquash(path file.Path) (io.ReadCloser, error) { return fetchReaderByPath(l.SquashedTree, l.fileCatalog, path) } diff --git a/test/integration/fixture_image_symlinks_test.go b/test/integration/fixture_image_symlinks_test.go index b72737bd..684af08f 100644 --- a/test/integration/fixture_image_symlinks_test.go +++ b/test/integration/fixture_image_symlinks_test.go @@ -5,6 +5,7 @@ package integration import ( "fmt" + "github.com/stretchr/testify/require" "io" "testing" @@ -113,37 +114,24 @@ func assertMatch(t *testing.T, i *image.Image, cfg linkFetchConfig, expectedReso func fetchRefs(t *testing.T, i *image.Image, cfg linkFetchConfig) (*file.Reference, *file.Reference) { _, link, err := i.Layers[cfg.linkLayer].Tree.File(file.Path(cfg.linkPath), cfg.linkOptions...) - if err != nil { - t.Fatalf("unable to get link: %+v", err) - } - if link == nil { - t.Fatalf("missing expected link: %s", cfg.linkPath) - } + require.NoError(t, err) + require.NotNil(t, link) _, expectedResolve, err := i.Layers[cfg.resolveLayer].Tree.File(file.Path(cfg.expectedPath), cfg.linkOptions...) - if err != nil { - t.Fatalf("unable to get resolved link: %+v", err) - } - if expectedResolve == nil { - t.Fatalf("missing expected path: %s", expectedResolve) - } + require.NoError(t, err) + require.NotNil(t, expectedResolve) actualResolve, err := i.ResolveLinkByLayerSquash(*link.Reference, cfg.perspectiveLayer, cfg.linkOptions...) - if err != nil { - t.Fatalf("failed to resolve link=%+v: %+v", link, err) - } + require.NoError(t, err) return expectedResolve.Reference, actualResolve.Reference } func fetchContents(t *testing.T, i *image.Image, cfg linkFetchConfig) string { - contents, err := i.Layers[cfg.perspectiveLayer].OpenFileFromSquash(file.Path(cfg.linkPath)) - if err != nil { - t.Fatalf("could not fetch contents of %+v: %+v", cfg.linkPath, err) - } + contents, err := i.Layers[cfg.perspectiveLayer].OpenPathFromSquash(file.Path(cfg.linkPath)) + require.NoError(t, err) + b, err := io.ReadAll(contents) - if err != nil { - t.Fatalf("unable to fetch contents for %+v : %+v", cfg, err) - } + require.NoError(t, err) return string(b) } From a2eecb60297ba149bd39714736eb977d528a1182 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sat, 4 Feb 2023 10:26:52 -0500 Subject: [PATCH 23/35] update docs with background Signed-off-by: Alex Goodman --- DEVELOPING.md | 52 ++++++++++++++++++++++++++++++++++++ pkg/file/reference_access.go | 3 ++- pkg/filetree/builder.go | 3 ++- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 DEVELOPING.md diff --git a/DEVELOPING.md b/DEVELOPING.md new file mode 100644 index 00000000..daa5551d --- /dev/null +++ b/DEVELOPING.md @@ -0,0 +1,52 @@ +# Developing + +## Getting started + +In order to test and develop in this repo you will need the following dependencies installed: +- Golang +- docker +- make + +After cloning the following step can help you get setup: +1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities. +2. run `make help` to view the selection of developer commands in the Makefile + +The main make tasks for common static analysis and testing are `lint`, `lint-fix`, `unit`, and `integration`. + +See `make help` for all the current make tasks. + +## Background + +Stereoscope is a library for reading and manipulating container images. It is capable of parsing multiple image +sources, providing a single abstraction for interacting with them. Ultimately this provides a squashfs-like +interface for interacting with image layers as well as a content API for accessing files contained within +the image. + +**Overview of objects:** +- `image.Image`: Once parsed with `image.Read()` this object represents a container image. Consists of a sequence of `image.Layer` objects, a `image.FileCatalog` for accessing files, and `filetree.SearchContext` for searching for files from the squashed representation of the image filesystem. Additionally exposes GGCR `v1.Image` objects for accessing the raw image metadata. +- `image.Layer`: represents a single layer of the image. Consists of a `filetree.FileTree` that represents the raw layer contents, and a `filetree.SearchContext` for searching for files relative to the raw (single layer) filetree as well as the squashed representation of the layer relative to all layers below this one. Additionally exposes GGCR `v1.Layer` objects for accessing the raw layer metadata. +- `filetree.FileTree`: a tree representing a filesystem. All nodes represent real paths (paths with no link resolution anywhere in the path) and are absolute paths (start with / and contain no relative path elements [e.g. ../ or ./]). This represents the filesystem structure and each node has a reference to the file metadata for that path. +- `file.Reference`: a unique file in the filesystem, identified by an absolute, real path as well as an integer ID (`file.ID`s). These are used to reference concrete nodes in the `filetree.FileTree` and `image.FileCatalog` objects. +- `file.Index`: stores all known `file.Reference` and `file.Metadata`. Entries are indexed with a variety of ways to provide fast access to references and metadata without needing to crawl the tree. This is especially useful for speeding up globbing. +- `image.FileCatalog`: an image-aware extension of `file.Index` that additionally relates `image.Layers` to `file.IDs` and provides a content API for any files contained within the image (regardless of which layer or squashed representation it exists in). + +### Searching for files + +Searching for files is exposed to users in three ways: +- search by file path +- search by file glob +- search by file content MIME type + +Searching itself is performed two different ways: +- search the `image.FileCatalog` on the image by a heuristic +- search the `filetree.FileTree` directly + +The "best way" to search is automatically determined in the `filetree.searchContext` object, exposed on `image.Image` and `image.Layer` objects as a `filetree.Searcher` for general use. + +### File trees + +The `filetree.FileTree` object represents a filesystem and consists of `filenode.Node` objects. The tree itself leverages `tree.Tree` as a generic datastructure. What `filetree.FileTree` adds is the concept of file types, the semantics of each type, the ability to resolve links based on a given strategy, merging of trees with the same semantics of a union filesystem (e.g. whiteout files), and the ability to search for files via direct paths or globs. + +The `fs.FS` abstraction has been implemented on `filetree.FileTree` to allow for easy integration with the standard library as well as to interop with the `doublestar` library to facilitate globing. Using the `fs.FS` abstraction for filetree operations is faster than OS interactions with the filesystem directly but relatively slower than the indexes provided by `image.FileCatalog` and `file.Index`. + +`filetre.FileTree` objects can be created with a corresponding `file.Index` object by leveraging the `filetree.Builder` object, which aids in the indexing of files. diff --git a/pkg/file/reference_access.go b/pkg/file/reference_access.go index 8017e88f..a83b97dd 100644 --- a/pkg/file/reference_access.go +++ b/pkg/file/reference_access.go @@ -20,7 +20,8 @@ type ReferenceAccessVia struct { type ReferenceAccessVias []ReferenceAccessVia -// NewFileReferenceVia shows how a reference was accessed. +// NewFileReferenceVia create a new ReferenceAccessVia for the given request path, showing the resolved reference (or +// nil if it does not exist), and the link resolution of the basename of the request path transitively. func NewFileReferenceVia(path Path, ref *Reference, leafs []ReferenceAccess) *ReferenceAccessVia { return &ReferenceAccessVia{ ReferenceAccess: ReferenceAccess{ diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go index 36d94be6..c22b3b27 100644 --- a/pkg/filetree/builder.go +++ b/pkg/filetree/builder.go @@ -2,6 +2,7 @@ package filetree import ( "fmt" + "github.com/anchore/stereoscope/pkg/file" ) @@ -48,6 +49,6 @@ func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { } b.index.Add(*ref, metadata) - + return ref, nil } From 1ec3d77533156b5120d2ad4bec5aaebf5bb7c208 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sun, 5 Feb 2023 10:02:52 -0500 Subject: [PATCH 24/35] fix get_xid for cross compilation Signed-off-by: Alex Goodman --- pkg/file/get_xid.go | 21 +++++++++++++++++++++ pkg/file/get_xid_win.go | 13 +++++++++++++ pkg/file/metadata.go | 16 +--------------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 pkg/file/get_xid.go create mode 100644 pkg/file/get_xid_win.go diff --git a/pkg/file/get_xid.go b/pkg/file/get_xid.go new file mode 100644 index 00000000..b59eb795 --- /dev/null +++ b/pkg/file/get_xid.go @@ -0,0 +1,21 @@ +//go:build linux || darwin || netbsd +// +build linux darwin netbsd + +package file + +import ( + "os" + "syscall" +) + +// getXid is the UID GID system info for unix +func getXid(info os.FileInfo) (uid, gid int) { + uid = -1 + gid = -1 + if stat, ok := info.Sys().(*syscall.Stat_t); ok { + uid = int(stat.Uid) + gid = int(stat.Gid) + } + + return uid, gid +} diff --git a/pkg/file/get_xid_win.go b/pkg/file/get_xid_win.go new file mode 100644 index 00000000..91083371 --- /dev/null +++ b/pkg/file/get_xid_win.go @@ -0,0 +1,13 @@ +//go:build windows +// +build windows + +package file + +import ( + "os" +) + +// getXid is a placeholder for windows file information +func getXid(info os.FileInfo) (uid, gid int) { + return -1, -1 +} diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 7ba0014f..5ba884af 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -2,13 +2,11 @@ package file import ( "archive/tar" + "github.com/anchore/stereoscope/internal/log" "io" "os" "path" "path/filepath" - "syscall" - - "github.com/anchore/stereoscope/internal/log" "github.com/sylabs/squashfs" ) @@ -126,15 +124,3 @@ func NewMetadataFromPath(path string, info os.FileInfo) Metadata { IsDir: info.IsDir(), } } - -// getXid is the UID GID system info for unix -func getXid(info os.FileInfo) (uid, gid int) { - uid = -1 - gid = -1 - if stat, ok := info.Sys().(*syscall.Stat_t); ok { - uid = int(stat.Uid) - gid = int(stat.Gid) - } - - return uid, gid -} From 33fb3e2e3ed652f9698482c0c84dcfa61c1d4e6a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sun, 5 Feb 2023 10:03:30 -0500 Subject: [PATCH 25/35] upgrade CI validations workflow Signed-off-by: Alex Goodman --- .github/actions/bootstrap/action.yaml | 81 +++++++++++ .github/scripts/build.sh | 80 +++++++++++ .github/scripts/coverage.py | 36 +++++ .github/scripts/go-mod-tidy-check.sh | 30 ++++ .github/workflows/benchmark-testing.yaml | 58 ++++++++ .github/workflows/validations.yaml | 173 +++-------------------- .gitignore | 1 + Makefile | 138 +++++++++++------- pkg/file/metadata.go | 3 +- pkg/file/type.go | 2 +- 10 files changed, 396 insertions(+), 206 deletions(-) create mode 100644 .github/actions/bootstrap/action.yaml create mode 100755 .github/scripts/build.sh create mode 100755 .github/scripts/coverage.py create mode 100755 .github/scripts/go-mod-tidy-check.sh create mode 100644 .github/workflows/benchmark-testing.yaml diff --git a/.github/actions/bootstrap/action.yaml b/.github/actions/bootstrap/action.yaml new file mode 100644 index 00000000..5544402d --- /dev/null +++ b/.github/actions/bootstrap/action.yaml @@ -0,0 +1,81 @@ +name: "Bootstrap" +description: "Bootstrap all tools and dependencies" +inputs: + go-version: + description: "Go version to install" + required: true + default: "1.19.x" + use-go-cache: + description: "Restore go cache" + required: true + default: "true" + cache-key-prefix: + description: "Prefix all cache keys with this value" + required: true + default: "831180ac25" + build-cache-key-prefix: + description: "Prefix build cache key with this value" + required: true + default: "f8b6d31dea" + bootstrap-apt-packages: + description: "Space delimited list of tools to install via apt" + default: "" + +runs: + using: "composite" + steps: + - uses: actions/setup-go@v3 + with: + go-version: ${{ inputs.go-version }} + + - name: Restore tool cache + id: tool-cache + uses: actions/cache@v3 + with: + path: ${{ github.workspace }}/.tmp + key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-tool-${{ hashFiles('Makefile') }} + + # note: we need to keep restoring the go mod cache before bootstrapping tools since `go install` is used in + # some installations of project tools. + - name: Restore go module cache + id: go-mod-cache + if: inputs.use-go-cache == 'true' + uses: actions/cache@v3 + with: + path: | + ~/go/pkg/mod + key: ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ inputs.cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}- + + - name: (cache-miss) Bootstrap project tools + shell: bash + if: steps.tool-cache.outputs.cache-hit != 'true' + run: make bootstrap-tools + + - name: Restore go build cache + id: go-cache + if: inputs.use-go-cache == 'true' + uses: actions/cache@v3 + with: + path: | + ~/.cache/go-build + key: ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ inputs.cache-key-prefix }}-${{ inputs.build-cache-key-prefix }}-${{ runner.os }}-go-${{ inputs.go-version }}- + + - name: (cache-miss) Bootstrap go dependencies + shell: bash + if: steps.go-mod-cache.outputs.cache-hit != 'true' && inputs.use-go-cache == 'true' + run: make bootstrap-go + + - name: Bootstrap CI dependencies + shell: bash + run: make ci-bootstrap + + - name: Install apt packages + if: inputs.bootstrap-apt-packages != '' + shell: bash + run: | + DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }} + diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh new file mode 100755 index 00000000..18f13cb4 --- /dev/null +++ b/.github/scripts/build.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +set -uo pipefail + +# Based on https://gist.github.com/eduncan911/68775dba9d3c028181e4 and https://gist.github.com/makeworld-the-better-one/e1bb127979ae4195f43aaa3ad46b1097 +# but improved to use the `go` command so it never goes out of date. + +type setopt >/dev/null 2>&1 + +contains() { + # Source: https://stackoverflow.com/a/8063398/7361270 + [[ $1 =~ (^|[[:space:]])$2($|[[:space:]]) ]] +} + +mkdir -p snapshot +rm -f snapshot/* + +OUTPUT=snapshot/stereoscope-example +FAILURES="" + +# You can set your own flags on the command line +FLAGS=${FLAGS:-"-ldflags=\"-s -w\""} + +# A list of OSes and architectures to not build for, space-separated +# It can be set from the command line when the script is called. +NOT_ALLOWED_OS=${NOT_ALLOWED_OS:-"js android ios solaris illumos aix dragonfly plan9"} +NOT_ALLOWED_ARCH=${NOT_ALLOWED_ARCH:-"riscv64 mips mips64 mips64le ppc64 ppc64le s390x wasm"} + + +# Get all targets +while IFS= read -r target; do + GOOS=${target%/*} + GOARCH=${target#*/} + BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}" + + if contains "$NOT_ALLOWED_OS" "$GOOS" ; then + continue + fi + + if contains "$NOT_ALLOWED_ARCH" "$GOARCH" ; then + continue + fi + + # Check for arm and set arm version + if [[ $GOARCH == "arm" ]]; then + # Set what arm versions each platform supports + if [[ $GOOS == "darwin" ]]; then + arms="7" + elif [[ $GOOS == "windows" ]]; then + # This is a guess, it's not clear what Windows supports from the docs + # But I was able to build all these on my machine + arms="5 6 7" + elif [[ $GOOS == *"bsd" ]]; then + arms="6 7" + else + # Linux goes here + arms="5 6 7" + fi + + # Now do the arm build + for GOARM in $arms; do + BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}${GOARM}" + if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi + CMD="GOARM=${GOARM} GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} $@" + echo "${CMD}" + eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}${GOARM}" + done + else + # Build non-arm here + if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi + CMD="GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} $@" + echo "${CMD}" + eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}" + fi +done <<< "$(go tool dist list)" + +if [[ "${FAILURES}" != "" ]]; then + echo "" + echo "build failed for: ${FAILURES}" + exit 1 +fi \ No newline at end of file diff --git a/.github/scripts/coverage.py b/.github/scripts/coverage.py new file mode 100755 index 00000000..db14135c --- /dev/null +++ b/.github/scripts/coverage.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import shlex + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +if len(sys.argv) < 3: + print("Usage: coverage.py [threshold] [go-coverage-report]") + sys.exit(1) + + +threshold = float(sys.argv[1]) +report = sys.argv[2] + + +args = shlex.split(f"go tool cover -func {report}") +p = subprocess.run(args, capture_output=True, text=True) + +percent_coverage = float(p.stdout.splitlines()[-1].split()[-1].replace("%", "")) +print(f"{bcolors.BOLD}Coverage: {percent_coverage}%{bcolors.ENDC}") + +if percent_coverage < threshold: + print(f"{bcolors.BOLD}{bcolors.FAIL}Coverage below threshold of {threshold}%{bcolors.ENDC}") + sys.exit(1) diff --git a/.github/scripts/go-mod-tidy-check.sh b/.github/scripts/go-mod-tidy-check.sh new file mode 100755 index 00000000..28f22fcd --- /dev/null +++ b/.github/scripts/go-mod-tidy-check.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -eu + +ORIGINAL_STATE_DIR=$(mktemp -d "TEMP-original-state-XXXXXXXXX") +TIDY_STATE_DIR=$(mktemp -d "TEMP-tidy-state-XXXXXXXXX") + +trap "cp -p ${ORIGINAL_STATE_DIR}/* ./ && git update-index -q --refresh && rm -fR ${ORIGINAL_STATE_DIR} ${TIDY_STATE_DIR}" EXIT + +# capturing original state of files... +cp go.mod go.sum "${ORIGINAL_STATE_DIR}" + +# capturing state of go.mod and go.sum after running go mod tidy... +go mod tidy +cp go.mod go.sum "${TIDY_STATE_DIR}" + +set +e + +# detect difference between the git HEAD state and the go mod tidy state +DIFF_MOD=$(diff -u "${ORIGINAL_STATE_DIR}/go.mod" "${TIDY_STATE_DIR}/go.mod") +DIFF_SUM=$(diff -u "${ORIGINAL_STATE_DIR}/go.sum" "${TIDY_STATE_DIR}/go.sum") + +if [[ -n "${DIFF_MOD}" || -n "${DIFF_SUM}" ]]; then + echo "go.mod diff:" + echo "${DIFF_MOD}" + echo "go.sum diff:" + echo "${DIFF_SUM}" + echo "" + printf "FAILED! go.mod and/or go.sum are NOT tidy; please run 'go mod tidy'.\n\n" + exit 1 +fi diff --git a/.github/workflows/benchmark-testing.yaml b/.github/workflows/benchmark-testing.yaml new file mode 100644 index 00000000..4cd87594 --- /dev/null +++ b/.github/workflows/benchmark-testing.yaml @@ -0,0 +1,58 @@ +name: "Benchmark testing" + +on: + workflow_dispatch: + pull_request: + +jobs: + + Benchmark-Test: + name: "Benchmark tests" + runs-on: ubuntu-20.04 + # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and + # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter + # the job by event. + steps: + - uses: actions/checkout@v3 + + - name: Bootstrap environment + uses: ./.github/actions/bootstrap + + - name: Restore base benchmark result + uses: actions/cache@v3 + with: + path: test/results/benchmark-main.txt + # use base sha for PR or new commit hash for main push in benchmark result key + key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }} + + - name: Run benchmark tests + id: benchmark + run: | + REF_NAME=${GITHUB_REF##*/} make benchmark + OUTPUT=$(make show-benchstat) + OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters + OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters + OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters + echo "::set-output name=result::$OUTPUT" + + - uses: actions/upload-artifact@v3 + with: + name: benchmark-test-results + path: test/results/**/* + + - name: Update PR benchmark results comment + uses: marocchino/sticky-pull-request-comment@v2 + continue-on-error: true + with: + header: benchmark + message: | + ### Benchmark Test Results + +
+ Benchmark results from the latest changes vs base branch + + ``` + ${{ steps.benchmark.outputs.result }} + ``` + +
diff --git a/.github/workflows/validations.yaml b/.github/workflows/validations.yaml index 4ed98466..f0314a6b 100644 --- a/.github/workflows/validations.yaml +++ b/.github/workflows/validations.yaml @@ -16,79 +16,28 @@ on: - main pull_request: -env: - GO_VERSION: "1.19.x" - jobs: Static-Analysis: name: "Static analysis" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 - - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} + - uses: actions/checkout@v3 - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - - name: Run static analysis - run: make static-analysis + - name: Run static analysis + run: make static-analysis Unit-Test: name: "Unit tests" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - name: Run unit tests run: make unit @@ -102,11 +51,10 @@ jobs: name: "Integration tests" runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} + - uses: actions/checkout@v3 - - uses: actions/checkout@v2 + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - name: Enable systemd for podman socket activation run: | @@ -128,29 +76,6 @@ jobs: with: limit-access-to-actor: true - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - name: Build key for test-fixture cache run: make integration-fingerprint @@ -177,77 +102,15 @@ jobs: - name: Run integration tests run: make integration - Benchmark-Test: - name: "Benchmark tests" + Build-Snapshot-Artifacts: + name: "Build snapshot artifacts" runs-on: ubuntu-20.04 - # note: we want benchmarks to run on pull_request events in order to publish results to a sticky comment, and - # we also want to run on push such that merges to main are recorded to the cache. For this reason we don't filter - # the job by event. steps: - - uses: actions/setup-go@v2 - with: - go-version: ${{ env.GO_VERSION }} - - - uses: actions/checkout@v2 - - - name: Restore tool cache - id: tool-cache - uses: actions/cache@v2.1.3 - with: - path: ${{ github.workspace }}/.tmp - key: ${{ runner.os }}-tool-${{ hashFiles('Makefile') }} - - - name: Restore go cache - id: go-cache - uses: actions/cache@v2.1.3 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ env.GO_VERSION }}-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go-${{ env.GO_VERSION }}- - - - name: (cache-miss) Bootstrap all project dependencies - if: steps.tool-cache.outputs.cache-hit != 'true' || steps.go-cache.outputs.cache-hit != 'true' - run: make bootstrap - - - name: Bootstrap CI environment dependencies - run: make ci-bootstrap - - - name: Restore base benchmark result - uses: actions/cache@v2 - with: - path: test/results/benchmark-main.txt - # use base sha for PR or new commit hash for main push in benchmark result key - key: ${{ runner.os }}-bench-${{ (github.event.pull_request.base.sha != github.event.after) && github.event.pull_request.base.sha || github.event.after }} - - - name: Run benchmark tests - id: benchmark - run: | - REF_NAME=${GITHUB_REF##*/} make benchmark - OUTPUT=$(make show-benchstat) - OUTPUT="${OUTPUT//'%'/'%25'}" # URL encode all '%' characters - OUTPUT="${OUTPUT//$'\n'/'%0A'}" # URL encode all '\n' characters - OUTPUT="${OUTPUT//$'\r'/'%0D'}" # URL encode all '\r' characters - echo "::set-output name=result::$OUTPUT" - - - uses: actions/upload-artifact@v2 - with: - name: benchmark-test-results - path: test/results/**/* - - - name: Update PR benchmark results comment - uses: marocchino/sticky-pull-request-comment@v2 - continue-on-error: true - with: - header: benchmark - message: | - ### Benchmark Test Results + - uses: actions/checkout@v3 -
- Benchmark results from the latest changes vs base branch + - name: Bootstrap environment + uses: ./.github/actions/bootstrap - ``` - ${{ steps.benchmark.outputs.result }} - ``` + - name: Build snapshot artifacts + run: make snapshot -
diff --git a/.gitignore b/.gitignore index 930ca30b..26630caf 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ coverage.txt **/test-fixtures/cache/ **/*.fingerprint +snapshot/ # Binaries for programs and plugins *.exe diff --git a/Makefile b/Makefile index 181594bb..74de855c 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,15 @@ -TEMPDIR = ./.tmp -RESULTSDIR = test/results -COVER_REPORT = $(RESULTSDIR)/unit-coverage-details.txt -COVER_TOTAL = $(RESULTSDIR)/unit-coverage-summary.txt -LINTCMD = $(TEMPDIR)/golangci-lint run --tests=false --config .golangci.yaml +TEMP_DIR = ./.tmp + +# Command templates ################################# +LINT_CMD = $(TEMP_DIR)/golangci-lint run --tests=false --config .golangci.yaml + +# Tool versions ################################# +GOLANGCILINT_VERSION := v1.51.0 +GOSIMPORTS_VERSION := v0.3.5 +BOUNCER_VERSION := v0.4.0 +CHRONICLE_VERSION := v0.5.1 + +# Formatting variables ################################# BOLD := $(shell tput -T linux bold) PURPLE := $(shell tput -T linux setaf 5) GREEN := $(shell tput -T linux setaf 2) @@ -11,15 +18,16 @@ RED := $(shell tput -T linux setaf 1) RESET := $(shell tput -T linux sgr0) TITLE := $(BOLD)$(PURPLE) SUCCESS := $(BOLD)$(GREEN) -# the quality gate lower threshold for unit test total % coverage (by function statements) -COVERAGE_THRESHOLD := 48 + +# Test variables ################################# +COVERAGE_THRESHOLD := 55 # the quality gate lower threshold for unit test total % coverage (by function statements) ifeq "$(strip $(VERSION))" "" override VERSION = $(shell git describe --always --tags --dirty) endif -ifndef TEMPDIR - $(error TEMPDIR is not set) +ifndef TEMP_DIR + $(error TEMP_DIR is not set) endif ifndef REF_NAME @@ -31,37 +39,43 @@ define title endef .PHONY: all -all: static-analysis test ## Run all checks (linting, all tests, and dependencies license checks) +all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests) @printf '$(SUCCESS)All checks pass!$(RESET)\n' +.PHONY: static-analysis +static-analysis: check-go-mod-tidy lint check-licenses ## Run all static analysis checks + .PHONY: test -test: unit integration benchmark ## Run all levels of test +test: unit integration benchmark ## Run all tests (currently unit and integrations) -.PHONY: help -help: - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}' + +## Bootstrapping targets ################################# .PHONY: ci-bootstrap ci-bootstrap: bootstrap - sudo apt install -y bc curl -sLO https://github.com/sylabs/singularity/releases/download/v3.10.0/singularity-ce_3.10.0-focal_amd64.deb && sudo apt-get install -y -f ./singularity-ce_3.10.0-focal_amd64.deb -$(RESULTSDIR): - mkdir -p $(RESULTSDIR) - -.PHONY: boostrap -bootstrap: $(RESULTSDIR) ## Download and install all project dependencies (+ prep tooling in the ./tmp dir) - $(call title,Downloading dependencies) - @pwd - # prep temp dirs - mkdir -p $(TEMPDIR) - mkdir -p $(RESULTSDIR) - # install go dependencies +.PHONY: bootstrap +bootstrap: $(TEMP_DIR) bootstrap-go bootstrap-tools ## Download and install all tooling dependencies (+ prep tooling in the ./tmp dir) + $(call title,Bootstrapping dependencies) + +.PHONY: bootstrap-tools +bootstrap-tools: $(TEMP_DIR) + GO111MODULE=off GOBIN=$(realpath $(TEMP_DIR)) go get -u golang.org/x/perf/cmd/benchstat + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMP_DIR)/ $(GOLANGCILINT_VERSION) + curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMP_DIR)/ $(BOUNCER_VERSION) + curl -sSfL https://raw.githubusercontent.com/anchore/chronicle/main/install.sh | sh -s -- -b $(TEMP_DIR)/ $(CHRONICLE_VERSION) + # the only difference between goimports and gosimports is that gosimports removes extra whitespace between import blocks (see https://github.com/golang/go/issues/20818) + GOBIN="$(realpath $(TEMP_DIR))" go install github.com/rinchsan/gosimports/cmd/gosimports@$(GOSIMPORTS_VERSION) + +.PHONY: bootstrap-go +bootstrap-go: go mod download - # install utilities - [ -f "$(TEMPDIR)/benchstat" ] || GO111MODULE=off GOBIN=$(shell realpath $(TEMPDIR)) go get -u golang.org/x/perf/cmd/benchstat - [ -f "$(TEMPDIR)/golangci" ] || curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.51.0 - [ -f "$(TEMPDIR)/bouncer" ] || curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMPDIR)/ v0.4.0 + +$(TEMP_DIR): + mkdir -p $(TEMP_DIR) + +## Static analysis targets ################################# .PHONY: static-analysis static-analysis: check-licenses lint @@ -71,40 +85,55 @@ lint: ## Run gofmt + golangci lint checks $(call title,Running linters) @printf "files with gofmt issues: [$(shell gofmt -l -s .)]\n" @test -z "$(shell gofmt -l -s .)" - $(LINTCMD) + $(LINT_CMD) .PHONY: lint-fix lint-fix: ## Auto-format all source code + run golangci lint fixers $(call title,Running lint fixers) gofmt -w -s . - $(LINTCMD) --fix + $(LINT_CMD) --fix go mod tidy .PHONY: check-licenses check-licenses: $(call title,Validating licenses for go dependencies) - $(TEMPDIR)/bouncer check + $(TEMP_DIR)/bouncer check + +check-go-mod-tidy: + @ .github/scripts/go-mod-tidy-check.sh && echo "go.mod and go.sum are tidy!" + +## Testing targets ################################# .PHONY: unit -unit: $(RESULTSDIR) ## Run unit tests (with coverage) +unit: $(TEMP_DIR) ## Run unit tests (with coverage) $(call title,Running unit tests) - go test --race -coverprofile $(COVER_REPORT) $(shell go list ./... | grep -v anchore/stereoscope/test/integration) - @go tool cover -func $(COVER_REPORT) | grep total | awk '{print substr($$3, 1, length($$3)-1)}' > $(COVER_TOTAL) - @echo "Coverage: $$(cat $(COVER_TOTAL))" - @if [ $$(echo "$$(cat $(COVER_TOTAL)) >= $(COVERAGE_THRESHOLD)" | bc -l) -ne 1 ]; then echo "$(RED)$(BOLD)Failed coverage quality gate (> $(COVERAGE_THRESHOLD)%)$(RESET)" && false; fi + go test -coverprofile $(TEMP_DIR)/unit-coverage-details.txt $(shell go list ./... | grep -v anchore/stereoscope/test) + @.github/scripts/coverage.py $(COVERAGE_THRESHOLD) $(TEMP_DIR)/unit-coverage-details.txt + + +.PHONY: integration +integration: integration-tools ## Run integration tests + $(call title,Running integration tests) + go test -v ./test/integration + +## Benchmark test targets ################################# + .PHONY: benchmark -benchmark: $(RESULTSDIR) ## Run benchmark tests and compare against the baseline (if available) +benchmark: $(TEMP_DIR) ## Run benchmark tests and compare against the baseline (if available) $(call title,Running benchmark tests) - go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(RESULTSDIR)/benchmark-$(REF_NAME).txt - (test -s $(RESULTSDIR)/benchmark-main.txt && \ - $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-main.txt $(RESULTSDIR)/benchmark-$(REF_NAME).txt || \ - $(TEMPDIR)/benchstat $(RESULTSDIR)/benchmark-$(REF_NAME).txt) \ - | tee $(RESULTSDIR)/benchstat.txt + go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(REF_NAME).txt + (test -s $(TEMP_DIR)/benchmark-main.txt && \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(REF_NAME).txt || \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(REF_NAME).txt) \ + | tee $(TEMP_DIR)/benchstat.txt + .PHONY: show-benchstat show-benchstat: - @cat $(RESULTSDIR)/benchstat.txt + @cat $(TEMP_DIR)/benchstat.txt + +## Test-fixture-related targets ################################# # note: this is used by CI to determine if the integration test fixture cache (docker image tars) should be busted .PHONY: integration-fingerprint @@ -127,11 +156,22 @@ integration-tools-load: integration-tools-save: @cd test/integration/tools && make save-cache -.PHONY: integration -integration: integration-tools ## Run integration tests - $(call title,Running integration tests) - go test -v ./test/integration +## Build-related targets ################################# + +.PHONY: snapshot +snapshot: ## Build the binary + $(call title,Build compatability test) + @.github/scripts/build.sh + +## Cleanup targets ################################# .PHONY: clear-test-cache clear-test-cache: ## Delete all test cache (built docker image tars) find . -type f -wholename "**/test-fixtures/cache/*.tar" -delete + + +## Halp! ################################# + +.PHONY: help +help: ## Display this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "$(BOLD)$(CYAN)%-25s$(RESET)%s\n", $$1, $$2}' diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 5ba884af..4dab4021 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -2,12 +2,13 @@ package file import ( "archive/tar" - "github.com/anchore/stereoscope/internal/log" "io" "os" "path" "path/filepath" + "github.com/anchore/stereoscope/internal/log" + "github.com/sylabs/squashfs" ) diff --git a/pkg/file/type.go b/pkg/file/type.go index 0d587a1b..2555fd46 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -37,7 +37,7 @@ func AllTypes() []Type { func TypeFromTarType(ty byte) Type { switch ty { - case tar.TypeReg, tar.TypeRegA: + case tar.TypeReg, tar.TypeRegA: // nolint: staticcheck return TypeReg case tar.TypeLink: return TypeHardLink From b0fc17226ef57a9c585a3f1035091e970fe1b987 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sun, 5 Feb 2023 10:08:56 -0500 Subject: [PATCH 26/35] fix snapshot builds Signed-off-by: Alex Goodman --- .github/scripts/build.sh | 12 +++++++----- Makefile | 38 +++++++++++++++++++++++++++----------- pkg/file/get_xid.go | 3 +-- pkg/file/get_xid_win.go | 1 - 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh index 18f13cb4..b911da8a 100755 --- a/.github/scripts/build.sh +++ b/.github/scripts/build.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -uo pipefail +SNAPSHOT_DIR=$1 + # Based on https://gist.github.com/eduncan911/68775dba9d3c028181e4 and https://gist.github.com/makeworld-the-better-one/e1bb127979ae4195f43aaa3ad46b1097 # but improved to use the `go` command so it never goes out of date. @@ -11,10 +13,10 @@ contains() { [[ $1 =~ (^|[[:space:]])$2($|[[:space:]]) ]] } -mkdir -p snapshot -rm -f snapshot/* +mkdir -p "${SNAPSHOT_DIR}" -OUTPUT=snapshot/stereoscope-example +BUILD_TARGET=./examples +OUTPUT=${SNAPSHOT_DIR}/stereoscope-example FAILURES="" # You can set your own flags on the command line @@ -60,14 +62,14 @@ while IFS= read -r target; do for GOARM in $arms; do BIN_FILENAME="${OUTPUT}-${GOOS}-${GOARCH}${GOARM}" if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi - CMD="GOARM=${GOARM} GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} $@" + CMD="GOARM=${GOARM} GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}" echo "${CMD}" eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}${GOARM}" done else # Build non-arm here if [[ "${GOOS}" == "windows" ]]; then BIN_FILENAME="${BIN_FILENAME}.exe"; fi - CMD="GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} $@" + CMD="GOOS=${GOOS} GOARCH=${GOARCH} go build $FLAGS -o ${BIN_FILENAME} ${BUILD_TARGET}" echo "${CMD}" eval "${CMD}" || FAILURES="${FAILURES} ${GOOS}/${GOARCH}" fi diff --git a/Makefile b/Makefile index 74de855c..b5f3d3d6 100644 --- a/Makefile +++ b/Makefile @@ -22,22 +22,30 @@ SUCCESS := $(BOLD)$(GREEN) # Test variables ################################# COVERAGE_THRESHOLD := 55 # the quality gate lower threshold for unit test total % coverage (by function statements) -ifeq "$(strip $(VERSION))" "" - override VERSION = $(shell git describe --always --tags --dirty) +## Build variables ################################# +SNAPSHOT_DIR := ./snapshot +VERSION := $(shell git describe --dirty --always --tags) + +ifndef VERSION + $(error VERSION is not set) endif ifndef TEMP_DIR $(error TEMP_DIR is not set) endif -ifndef REF_NAME - REF_NAME = $(VERSION) -endif - define title @printf '$(TITLE)$(1)$(RESET)\n' endef +define safe_rm_rf + bash -c 'test -z "$(1)" && false || rm -rf $(1)' +endef + +define safe_rm_rf_children + bash -c 'test -z "$(1)" && false || rm -rf $(1)/*' +endef + .PHONY: all all: static-analysis test ## Run all linux-based checks (linting, license check, unit, integration, and linux compare tests) @printf '$(SUCCESS)All checks pass!$(RESET)\n' @@ -122,10 +130,10 @@ integration: integration-tools ## Run integration tests .PHONY: benchmark benchmark: $(TEMP_DIR) ## Run benchmark tests and compare against the baseline (if available) $(call title,Running benchmark tests) - go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(REF_NAME).txt + go test -cpu 2 -p 1 -run=^Benchmark -bench=. -count=5 -benchmem ./... | tee $(TEMP_DIR)/benchmark-$(VERSION).txt (test -s $(TEMP_DIR)/benchmark-main.txt && \ - $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(REF_NAME).txt || \ - $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(REF_NAME).txt) \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-main.txt $(TEMP_DIR)/benchmark-$(VERSION).txt || \ + $(TEMP_DIR)/benchstat $(TEMP_DIR)/benchmark-$(VERSION).txt) \ | tee $(TEMP_DIR)/benchstat.txt @@ -159,12 +167,20 @@ integration-tools-save: ## Build-related targets ################################# .PHONY: snapshot -snapshot: ## Build the binary +snapshot: clean-snapshot ## Build the binary $(call title,Build compatability test) - @.github/scripts/build.sh + @.github/scripts/build.sh $(SNAPSHOT_DIR) ## Cleanup targets ################################# +.PHONY: clean +clean: clear-test-cache clean-snapshot ## Delete all generated artifacts + $(call safe_rm_rf_children,$(TEMP_DIR)) + +.PHONY: clean-snapshot +clean-snapshot: ## Delete all snapshot builds + $(call safe_rm_rf,$(SNAPSHOT_DIR)) + .PHONY: clear-test-cache clear-test-cache: ## Delete all test cache (built docker image tars) find . -type f -wholename "**/test-fixtures/cache/*.tar" -delete diff --git a/pkg/file/get_xid.go b/pkg/file/get_xid.go index b59eb795..29a7b02a 100644 --- a/pkg/file/get_xid.go +++ b/pkg/file/get_xid.go @@ -1,5 +1,4 @@ -//go:build linux || darwin || netbsd -// +build linux darwin netbsd +//go:build !windows package file diff --git a/pkg/file/get_xid_win.go b/pkg/file/get_xid_win.go index 91083371..abe28de8 100644 --- a/pkg/file/get_xid_win.go +++ b/pkg/file/get_xid_win.go @@ -1,5 +1,4 @@ //go:build windows -// +build windows package file From 53dbcca224a2e4b9baf90cdb593f8f2ae986dbfa Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 6 Feb 2023 10:54:57 -0500 Subject: [PATCH 27/35] add tests for file.Index.GetByFileType Signed-off-by: Alex Goodman --- pkg/filetree/index_test.go | 176 +++++++++++++++++++++++++++++++++++++ pkg/filetree/search.go | 5 -- 2 files changed, 176 insertions(+), 5 deletions(-) diff --git a/pkg/filetree/index_test.go b/pkg/filetree/index_test.go index e436abb2..e1abed49 100644 --- a/pkg/filetree/index_test.go +++ b/pkg/filetree/index_test.go @@ -130,6 +130,182 @@ func Test_fileExtensions(t *testing.T) { } } +func TestFileCatalog_GetByFileType(t *testing.T) { + fileIndex := commonIndexFixture(t) + + tests := []struct { + name string + input []file.Type + want []IndexEntry + wantErr require.ErrorAssertionFunc + }{ + { + name: "get real file", + input: []file.Type{file.TypeReg}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-1.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.d", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one/.file-4.tar.gz", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two/file-2.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + { + Reference: file.Reference{RealPath: "/path/file-3.txt"}, + Metadata: file.Metadata{ + Path: "/path/file-3.txt", + Type: file.TypeReg, + MIMEType: "text/plain", + }, + }, + }, + }, + { + name: "get directories", + input: []file.Type{file.TypeDir}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path"}, + Metadata: file.Metadata{ + Path: "/path", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + + Reference: file.Reference{RealPath: "/path/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/branch.d", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/one"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/one", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/branch.d/two"}, + Metadata: file.Metadata{ + Path: "/path/branch.d/two", + Type: file.TypeDir, + IsDir: true, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common"}, + Metadata: file.Metadata{ + Path: "/path/common", + Type: file.TypeDir, + IsDir: true, + }, + }, + }, + }, + { + name: "get links", + input: []file.Type{file.TypeHardLink, file.TypeSymlink}, + want: []IndexEntry{ + { + Reference: file.Reference{RealPath: "/path/common/branch.d"}, + Metadata: file.Metadata{ + Path: "/path/common/branch.d", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/branch"}, + Metadata: file.Metadata{ + Path: "/path/common/branch", + LinkDestination: "path/branch.d", + Type: file.TypeSymlink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/file-4"}, + Metadata: file.Metadata{ + Path: "/path/common/file-4", + LinkDestination: "path/branch.d/one/file-4.d", + Type: file.TypeSymlink, + }, + }, + { + Reference: file.Reference{RealPath: "/path/common/file-1.d"}, + Metadata: file.Metadata{ + Path: "/path/common/file-1.d", + LinkDestination: "path/branch.d/one/file-1.txt", + Type: file.TypeSymlink, + }, + }, + }, + }, + { + name: "get non-existent types", + input: []file.Type{file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo, file.TypeSocket, file.TypeIrregular}, + want: []IndexEntry{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + actual, err := fileIndex.GetByFileType(tt.input...) + tt.wantErr(t, err) + if err != nil { + return + } + if d := cmp.Diff(tt.want, actual, + cmpopts.EquateEmpty(), + cmpopts.IgnoreUnexported(file.Reference{}), + cmpopts.IgnoreFields(file.Metadata{}, "Mode", "GroupID", "UserID", "Size"), + ); d != "" { + t.Errorf("diff: %s", d) + } + }) + } +} + func TestFileCatalog_GetByExtension(t *testing.T) { fileIndex := commonIndexFixture(t) diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 3b48f15c..823df8f4 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -25,7 +25,6 @@ type searchContext struct { index IndexReader // this index is relative to one or more trees, not just necessarily one // the following enables correct link resolution when searching via the index - linkForwardRef map[node.ID]node.ID // {link-node-id: link-destination-node-id} linkBackwardRefs map[node.ID]node.IDSet // {link-destination-node-id: str([link-node-id, ...])} } @@ -33,7 +32,6 @@ func NewSearchContext(tree Reader, index IndexReader) Searcher { c := &searchContext{ tree: tree.(*FileTree), index: index, - linkForwardRef: make(map[node.ID]node.ID), linkBackwardRefs: make(map[node.ID]node.IDSet), } @@ -72,9 +70,6 @@ func (sc *searchContext) buildLinkResolutionIndex() error { linkID := fn.ID() destinationID := destinationFna.FileNode.ID() - // add forward reference... - sc.linkForwardRef[linkID] = destinationID - // add backward reference... if _, ok := sc.linkBackwardRefs[destinationID]; !ok { sc.linkBackwardRefs[destinationID] = node.NewIDSet() From c5bfa98786399356899e0c2fcfdc1981476df1b5 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 11:04:34 -0500 Subject: [PATCH 28/35] rename file.Type and file.Resolution Signed-off-by: Alex Goodman --- pkg/file/metadata.go | 10 +- pkg/file/metadata_test.go | 22 +- pkg/file/reference_access_test.go | 131 --------- pkg/file/reference_test.go | 118 ++++---- .../{reference_access.go => resolution.go} | 57 ++-- pkg/file/resolution_test.go | 121 +++++++++ pkg/file/tarutil_test.go | 4 +- pkg/file/type.go | 40 +-- pkg/filetree/builder.go | 4 +- pkg/filetree/filenode/filenode.go | 8 +- pkg/filetree/filetree.go | 34 +-- pkg/filetree/filetree_test.go | 167 +++++------- pkg/filetree/glob.go | 4 +- pkg/filetree/glob_parser.go | 27 +- pkg/filetree/glob_parser_test.go | 14 +- pkg/filetree/glob_test.go | 20 +- pkg/filetree/index_test.go | 96 +++---- pkg/filetree/interfaces.go | 4 +- pkg/filetree/node_access.go | 6 +- pkg/filetree/search.go | 48 ++-- pkg/filetree/search_test.go | 251 ++++++++---------- pkg/image/file_catalog_test.go | 52 ++-- pkg/image/image.go | 4 +- 23 files changed, 582 insertions(+), 660 deletions(-) delete mode 100644 pkg/file/reference_access_test.go rename pkg/file/{reference_access.go => resolution.go} (65%) create mode 100644 pkg/file/resolution_test.go diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 4dab4021..2a6358ff 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -52,15 +52,15 @@ func NewMetadataFromSquashFSFile(path string, f *squashfs.File) (Metadata, error var ty Type switch { case fi.IsDir(): - ty = TypeDir + ty = TypeDirectory case f.IsRegular(): - ty = TypeReg + ty = TypeRegular case f.IsSymlink(): - ty = TypeSymlink + ty = TypeSymLink default: switch fi.Mode() & os.ModeType { case os.ModeNamedPipe: - ty = TypeFifo + ty = TypeFIFO case os.ModeSocket: ty = TypeSocket case os.ModeDevice: @@ -95,7 +95,7 @@ func NewMetadataFromPath(path string, info os.FileInfo) Metadata { ty := TypeFromMode(info.Mode()) - if ty == TypeReg { + if ty == TypeRegular { f, err := os.Open(path) if err != nil { // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns diff --git a/pkg/file/metadata_test.go b/pkg/file/metadata_test.go index 4b69016d..aa60bc95 100644 --- a/pkg/file/metadata_test.go +++ b/pkg/file/metadata_test.go @@ -18,13 +18,13 @@ func TestFileMetadataFromTar(t *testing.T) { tarReader := getTarFixture(t, "fixture-1") expected := []Metadata{ - {Path: "/path", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/one/file-1.txt", Type: TypeReg, LinkDestination: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/branch/two", Type: TypeDir, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, - {Path: "/path/branch/two/file-2.txt", Type: TypeReg, LinkDestination: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, - {Path: "/path/file-3.txt", Type: TypeReg, LinkDestination: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o700, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/one/file-1.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o700, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/branch/two", Type: TypeDirectory, LinkDestination: "", Size: 0, Mode: os.ModeDir | 0o755, UserID: 1337, GroupID: 5432, IsDir: true, MIMEType: ""}, + {Path: "/path/branch/two/file-2.txt", Type: TypeRegular, LinkDestination: "", Size: 12, Mode: 0o755, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, + {Path: "/path/file-3.txt", Type: TypeRegular, LinkDestination: "", Size: 11, Mode: 0o664, UserID: 1337, GroupID: 5432, IsDir: false, MIMEType: "text/plain"}, } var actual []Metadata @@ -55,22 +55,22 @@ func TestFileMetadataFromPath(t *testing.T) { }{ { path: "test-fixtures/symlinks-simple/readme", - expectedType: TypeReg, + expectedType: TypeRegular, expectedMIMEType: "text/plain", }, { path: "test-fixtures/symlinks-simple/link_to_new_readme", - expectedType: TypeSymlink, + expectedType: TypeSymLink, expectedMIMEType: "", }, { path: "test-fixtures/symlinks-simple/link_to_link_to_new_readme", - expectedType: TypeSymlink, + expectedType: TypeSymLink, expectedMIMEType: "", }, { path: "test-fixtures/symlinks-simple", - expectedType: TypeDir, + expectedType: TypeDirectory, expectedMIMEType: "", }, } diff --git a/pkg/file/reference_access_test.go b/pkg/file/reference_access_test.go deleted file mode 100644 index 34540b9f..00000000 --- a/pkg/file/reference_access_test.go +++ /dev/null @@ -1,131 +0,0 @@ -package file - -import ( - "github.com/stretchr/testify/assert" - "sort" - "testing" -) - -func TestReferenceAccessVias_Less(t *testing.T) { - - realA := ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent/a", - Reference: &Reference{ - RealPath: "/parent/a", - }, - }, - } - - realB := ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent/b", - Reference: &Reference{ - RealPath: "/parent/b", - }, - }, - } - - linkToA := ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent-link/a", - Reference: &Reference{ - RealPath: "/a", - }, - }, - } - - linkToB := ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent-link/b", - Reference: &Reference{ - RealPath: "/b", - }, - }, - } - - tests := []struct { - name string - subject []ReferenceAccessVia - want []ReferenceAccessVia - }{ - { - name: "references to real files are preferred first", - subject: []ReferenceAccessVia{ - linkToA, - realA, - }, - want: []ReferenceAccessVia{ - realA, - linkToA, - }, - }, - { - name: "real files are treated equally by request name", - subject: []ReferenceAccessVia{ - realB, - realA, - }, - want: []ReferenceAccessVia{ - realA, - realB, - }, - }, - { - name: "link files are treated equally by request name", - subject: []ReferenceAccessVia{ - linkToB, - linkToA, - }, - want: []ReferenceAccessVia{ - linkToA, - linkToB, - }, - }, - { - name: "regression", - subject: []ReferenceAccessVia{ - { - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent-link/file-4.txt", - Reference: &Reference{ - RealPath: "/parent/file-4.txt", - }, - }, - }, - { - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent/file-4.txt", - Reference: &Reference{ - RealPath: "/parent/file-4.txt", - }, - }, - }, - }, - want: []ReferenceAccessVia{ - { - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent/file-4.txt", - Reference: &Reference{ - RealPath: "/parent/file-4.txt", - }, - }, - }, - { - ReferenceAccess: ReferenceAccess{ - RequestPath: "/parent-link/file-4.txt", - Reference: &Reference{ - RealPath: "/parent/file-4.txt", - }, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - sort.Sort(ReferenceAccessVias(tt.subject)) - assert.Equal(t, tt.want, tt.subject) - }) - } -} diff --git a/pkg/file/reference_test.go b/pkg/file/reference_test.go index 1d9b000b..9782f67d 100644 --- a/pkg/file/reference_test.go +++ b/pkg/file/reference_test.go @@ -5,31 +5,28 @@ import ( "testing" ) -func TestReferenceAccessVia_RequestPaths(t *testing.T) { +func TestResolution_RequestResolutionPath(t *testing.T) { tests := []struct { name string - subject ReferenceAccessVia + subject Resolution want []Path }{ { name: "empty", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{}, - LeafLinkResolution: nil, + subject: Resolution{ + LinkResolutions: nil, }, want: nil, }, { name: "single ref", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{ - id: 1, - RealPath: "/home/wagoodman/file.txt", - }, + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", }, - LeafLinkResolution: nil, + LinkResolutions: nil, }, want: []Path{ "/home/wagoodman/file.txt", @@ -38,12 +35,10 @@ func TestReferenceAccessVia_RequestPaths(t *testing.T) { { // /home -> /another/place name: "ref with 1 leaf link resolutions", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home", - Reference: &Reference{RealPath: "/another/place"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ { RequestPath: "/home", Reference: &Reference{RealPath: "/home"}, @@ -72,12 +67,10 @@ func TestReferenceAccessVia_RequestPaths(t *testing.T) { // └── file.txt -> link-to-1/file.txt name: "ref with 2 leaf link resolutions", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{RealPath: "/2/real-file.txt"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, @@ -110,14 +103,12 @@ func TestReferenceAccessVia_RequestPaths(t *testing.T) { // └── file.txt -> link-to-1/file.txt name: "ref with dead link", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - // note: this falls back to the last path that exists which is the behavior for link resolution options: - // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, @@ -148,35 +139,32 @@ func TestReferenceAccessVia_RequestPaths(t *testing.T) { } } -func TestReferenceAccessVia_AccessReferences(t *testing.T) { +func TestReferenceResolutionVia_ResolutionReferences(t *testing.T) { type fields struct { - ReferenceAccess ReferenceAccess - LeafLinkResolution []ReferenceAccess + ReferenceResolution Resolution + LeafLinkResolution []Resolution } tests := []struct { name string - subject ReferenceAccessVia + subject Resolution want []Reference }{ { name: "empty", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{}, - LeafLinkResolution: nil, + subject: Resolution{ + LinkResolutions: nil, }, want: nil, }, { name: "single ref", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{ - id: 1, - RealPath: "/home/wagoodman/file.txt", - }, + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", }, - LeafLinkResolution: nil, + LinkResolutions: nil, }, want: []Reference{ { @@ -188,12 +176,10 @@ func TestReferenceAccessVia_AccessReferences(t *testing.T) { { // /home -> /another/place name: "ref with 1 leaf link resolutions", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home", - Reference: &Reference{RealPath: "/another/place"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ { RequestPath: "/home", Reference: &Reference{RealPath: "/home"}, @@ -222,12 +208,10 @@ func TestReferenceAccessVia_AccessReferences(t *testing.T) { // └── file.txt -> link-to-1/file.txt name: "ref with 2 leaf link resolutions", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{RealPath: "/2/real-file.txt"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, @@ -259,14 +243,12 @@ func TestReferenceAccessVia_AccessReferences(t *testing.T) { // └── file.txt -> link-to-1/file.txt name: "ref with dead link", - subject: ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - // note: this falls back to the last path that exists which is the behavior for link resolution options: - // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - LeafLinkResolution: []ReferenceAccess{ + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, diff --git a/pkg/file/reference_access.go b/pkg/file/resolution.go similarity index 65% rename from pkg/file/reference_access.go rename to pkg/file/resolution.go index a83b97dd..738ebd49 100644 --- a/pkg/file/reference_access.go +++ b/pkg/file/resolution.go @@ -6,37 +6,32 @@ import ( "github.com/scylladb/go-set/strset" ) -// ReferenceAccess represents the fetching of a possibly non-existent file, and how it was accessed. -type ReferenceAccess struct { +// Resolution represents the fetching of a possibly non-existent file via a request path. +type Resolution struct { RequestPath Path *Reference + // LinkResolutions represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. + // note: today this only shows resolutions via the basename of the request path, but in the future it may show all resolutions. + LinkResolutions []Resolution } -// ReferenceAccessVia represents a possibly non-existent file, and how it was accessed, including all symlink and hardlink resolution. -type ReferenceAccessVia struct { - ReferenceAccess - LeafLinkResolution []ReferenceAccess -} - -type ReferenceAccessVias []ReferenceAccessVia +type Resolutions []Resolution -// NewFileReferenceVia create a new ReferenceAccessVia for the given request path, showing the resolved reference (or +// NewResolution create a new Resolution for the given request path, showing the resolved reference (or // nil if it does not exist), and the link resolution of the basename of the request path transitively. -func NewFileReferenceVia(path Path, ref *Reference, leafs []ReferenceAccess) *ReferenceAccessVia { - return &ReferenceAccessVia{ - ReferenceAccess: ReferenceAccess{ - RequestPath: path, - Reference: ref, - }, - LeafLinkResolution: leafs, +func NewResolution(path Path, ref *Reference, leafs []Resolution) *Resolution { + return &Resolution{ + RequestPath: path, + Reference: ref, + LinkResolutions: leafs, } } -func (f ReferenceAccessVias) Len() int { +func (f Resolutions) Len() int { return len(f) } -func (f ReferenceAccessVias) Less(i, j int) bool { +func (f Resolutions) Less(i, j int) bool { ith := f[i] jth := f[j] @@ -53,24 +48,24 @@ func (f ReferenceAccessVias) Less(i, j int) bool { return ith.RequestPath < jth.RequestPath } -func (f ReferenceAccessVias) Swap(i, j int) { +func (f Resolutions) Swap(i, j int) { f[i], f[j] = f[j], f[i] } -func (f *ReferenceAccessVia) HasReference() bool { +func (f *Resolution) HasReference() bool { if f == nil { return false } return f.Reference != nil } -func (f *ReferenceAccessVia) AllPaths() []Path { +func (f *Resolution) AllPaths() []Path { set := strset.New() set.Add(string(f.RequestPath)) if f.Reference != nil { set.Add(string(f.Reference.RealPath)) } - for _, p := range f.LeafLinkResolution { + for _, p := range f.LinkResolutions { set.Add(string(p.RequestPath)) if p.Reference != nil { set.Add(string(p.Reference.RealPath)) @@ -87,10 +82,10 @@ func (f *ReferenceAccessVia) AllPaths() []Path { return results } -func (f *ReferenceAccessVia) AllRequestPaths() []Path { +func (f *Resolution) AllRequestPaths() []Path { set := strset.New() set.Add(string(f.RequestPath)) - for _, p := range f.LeafLinkResolution { + for _, p := range f.LinkResolutions { set.Add(string(p.RequestPath)) } @@ -105,7 +100,7 @@ func (f *ReferenceAccessVia) AllRequestPaths() []Path { } // RequestResolutionPath represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceAccessVia) RequestResolutionPath() []Path { +func (f *Resolution) RequestResolutionPath() []Path { var paths []Path var firstPath Path var lastLinkResolutionIsDead bool @@ -114,7 +109,7 @@ func (f *ReferenceAccessVia) RequestResolutionPath() []Path { firstPath = f.RequestPath paths = append(paths, f.RequestPath) } - for i, p := range f.LeafLinkResolution { + for i, p := range f.LinkResolutions { if i == 0 && p.RequestPath == f.RequestPath { // ignore link resolution that starts with the same user requested path continue @@ -125,7 +120,7 @@ func (f *ReferenceAccessVia) RequestResolutionPath() []Path { paths = append(paths, p.RequestPath) - if i == len(f.LeafLinkResolution)-1 { + if i == len(f.LinkResolutions)-1 { // we've reached the final link resolution if p.Reference == nil { lastLinkResolutionIsDead = true @@ -141,15 +136,15 @@ func (f *ReferenceAccessVia) RequestResolutionPath() []Path { } // ResolutionReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *ReferenceAccessVia) ResolutionReferences() []Reference { +func (f *Resolution) ResolutionReferences() []Reference { var refs []Reference var lastLinkResolutionIsDead bool - for i, p := range f.LeafLinkResolution { + for i, p := range f.LinkResolutions { if p.Reference != nil { refs = append(refs, *p.Reference) } - if i == len(f.LeafLinkResolution)-1 { + if i == len(f.LinkResolutions)-1 { // we've reached the final link resolution if p.Reference == nil { lastLinkResolutionIsDead = true diff --git a/pkg/file/resolution_test.go b/pkg/file/resolution_test.go new file mode 100644 index 00000000..45a7e994 --- /dev/null +++ b/pkg/file/resolution_test.go @@ -0,0 +1,121 @@ +package file + +import ( + "github.com/stretchr/testify/assert" + "sort" + "testing" +) + +func TestReferenceResolutionVias_Less(t *testing.T) { + + realA := Resolution{ + + RequestPath: "/parent/a", + Reference: &Reference{ + RealPath: "/parent/a", + }, + } + + realB := Resolution{ + + RequestPath: "/parent/b", + Reference: &Reference{ + RealPath: "/parent/b", + }, + } + + linkToA := Resolution{ + + RequestPath: "/parent-link/a", + Reference: &Reference{ + RealPath: "/a", + }, + } + + linkToB := Resolution{ + RequestPath: "/parent-link/b", + Reference: &Reference{ + RealPath: "/b", + }, + } + + tests := []struct { + name string + subject []Resolution + want []Resolution + }{ + { + name: "references to real files are preferred first", + subject: []Resolution{ + linkToA, + realA, + }, + want: []Resolution{ + realA, + linkToA, + }, + }, + { + name: "real files are treated equally by request name", + subject: []Resolution{ + realB, + realA, + }, + want: []Resolution{ + realA, + realB, + }, + }, + { + name: "link files are treated equally by request name", + subject: []Resolution{ + linkToB, + linkToA, + }, + want: []Resolution{ + linkToA, + linkToB, + }, + }, + { + name: "regression", + subject: []Resolution{ + { + + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + { + + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + want: []Resolution{ + { + RequestPath: "/parent/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + { + + RequestPath: "/parent-link/file-4.txt", + Reference: &Reference{ + RealPath: "/parent/file-4.txt", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sort.Sort(Resolutions(tt.subject)) + assert.Equal(t, tt.want, tt.subject) + }) + } +} diff --git a/pkg/file/tarutil_test.go b/pkg/file/tarutil_test.go index 5d8fcb18..2103f242 100644 --- a/pkg/file/tarutil_test.go +++ b/pkg/file/tarutil_test.go @@ -67,7 +67,7 @@ func TestMetadataFromTar(t *testing.T) { Size: 12, UserID: 1337, GroupID: 5432, - Type: TypeReg, + Type: TypeRegular, IsDir: false, Mode: 0x1ed, MIMEType: "application/octet-stream", @@ -82,7 +82,7 @@ func TestMetadataFromTar(t *testing.T) { Size: 0, UserID: 1337, GroupID: 5432, - Type: TypeDir, + Type: TypeDirectory, IsDir: true, Mode: 0x800001ed, MIMEType: "", diff --git a/pkg/file/type.go b/pkg/file/type.go index 2555fd46..67562ae9 100644 --- a/pkg/file/type.go +++ b/pkg/file/type.go @@ -6,13 +6,13 @@ import ( ) const ( - TypeReg Type = iota + TypeRegular Type = iota TypeHardLink - TypeSymlink + TypeSymLink TypeCharacterDevice TypeBlockDevice - TypeDir - TypeFifo + TypeDirectory + TypeFIFO TypeSocket TypeIrregular ) @@ -23,13 +23,13 @@ type Type int func AllTypes() []Type { return []Type{ - TypeReg, + TypeRegular, TypeHardLink, - TypeSymlink, + TypeSymLink, TypeCharacterDevice, TypeBlockDevice, - TypeDir, - TypeFifo, + TypeDirectory, + TypeFIFO, TypeSocket, TypeIrregular, } @@ -38,19 +38,19 @@ func AllTypes() []Type { func TypeFromTarType(ty byte) Type { switch ty { case tar.TypeReg, tar.TypeRegA: // nolint: staticcheck - return TypeReg + return TypeRegular case tar.TypeLink: return TypeHardLink case tar.TypeSymlink: - return TypeSymlink + return TypeSymLink case tar.TypeChar: return TypeCharacterDevice case tar.TypeBlock: return TypeBlockDevice case tar.TypeDir: - return TypeDir + return TypeDirectory case tar.TypeFifo: - return TypeFifo + return TypeFIFO default: return TypeIrregular } @@ -59,7 +59,7 @@ func TypeFromTarType(ty byte) Type { func TypeFromMode(mode os.FileMode) Type { switch { case isSet(mode, os.ModeSymlink): - return TypeSymlink + return TypeSymLink case isSet(mode, os.ModeIrregular): return TypeIrregular case isSet(mode, os.ModeCharDevice): @@ -67,13 +67,13 @@ func TypeFromMode(mode os.FileMode) Type { case isSet(mode, os.ModeDevice): return TypeBlockDevice case isSet(mode, os.ModeNamedPipe): - return TypeFifo + return TypeFIFO case isSet(mode, os.ModeSocket): return TypeSocket case mode.IsDir(): - return TypeDir + return TypeDirectory case mode.IsRegular(): - return TypeReg + return TypeRegular default: return TypeIrregular } @@ -85,19 +85,19 @@ func isSet(mode, field os.FileMode) bool { func (t Type) String() string { switch t { - case TypeReg: + case TypeRegular: return "RegularFile" case TypeHardLink: return "HardLink" - case TypeSymlink: + case TypeSymLink: return "SymbolicLink" case TypeCharacterDevice: return "CharacterDevice" case TypeBlockDevice: return "BlockDevice" - case TypeDir: + case TypeDirectory: return "Directory" - case TypeFifo: + case TypeFIFO: return "FIFONode" case TypeSocket: return "Socket" diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go index c22b3b27..0cfbb86f 100644 --- a/pkg/filetree/builder.go +++ b/pkg/filetree/builder.go @@ -23,7 +23,7 @@ func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { var ref *file.Reference var err error switch metadata.Type { - case file.TypeSymlink: + case file.TypeSymLink: ref, err = b.tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) if err != nil { return nil, err @@ -33,7 +33,7 @@ func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { if err != nil { return nil, err } - case file.TypeDir: + case file.TypeDirectory: ref, err = b.tree.AddDir(file.Path(metadata.Path)) if err != nil { return nil, err diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go index ba2dcb3c..14f0fe7a 100644 --- a/pkg/filetree/filenode/filenode.go +++ b/pkg/filetree/filenode/filenode.go @@ -18,7 +18,7 @@ type FileNode struct { func NewDir(p file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeDir, + FileType: file.TypeDirectory, Reference: ref, } } @@ -26,7 +26,7 @@ func NewDir(p file.Path, ref *file.Reference) *FileNode { func NewFile(p file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeReg, + FileType: file.TypeRegular, Reference: ref, } } @@ -34,7 +34,7 @@ func NewFile(p file.Path, ref *file.Reference) *FileNode { func NewSymLink(p, linkPath file.Path, ref *file.Reference) *FileNode { return &FileNode{ RealPath: p, - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: linkPath, Reference: ref, } @@ -65,7 +65,7 @@ func (n *FileNode) Copy() node.Node { } func (n *FileNode) IsLink() bool { - return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymlink + return n.FileType == file.TypeHardLink || n.FileType == file.TypeSymLink } func IDByPath(p file.Path) node.ID { diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 3045f2b2..8c5e91cd 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -52,7 +52,7 @@ func (t *FileTree) Copy() (ReadWriter, error) { // AllFiles returns all files within the FileTree (defaults to regular files only, but you can provide one or more allow types). func (t *FileTree) AllFiles(types ...file.Type) []file.Reference { if len(types) == 0 { - types = []file.Type{file.TypeReg} + types = []file.Type{file.TypeRegular} } typeSet := iset.New() @@ -94,7 +94,7 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { return nil, nil } - if fna.FileNode.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDirectory { return nil, nil } @@ -119,13 +119,13 @@ func (t *FileTree) ListPaths(dir file.Path) ([]file.Path, error) { } // File fetches a file.Reference for the given path. Returns nil if the path does not exist in the FileTree. -func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) { +func (t *FileTree) File(path file.Path, options ...LinkResolutionOption) (bool, *file.Resolution, error) { currentNode, err := t.file(path, options...) if err != nil { return false, nil, err } if currentNode.HasFileNode() { - return true, currentNode.FileReferenceVia(), err + return true, currentNode.FileResolution(), err } return false, nil, err } @@ -172,14 +172,14 @@ func (t *FileTree) file(path file.Path, options ...LinkResolutionOption) (*nodeA return nil, err } -func newReferenceAccessPath(nodePath []nodeAccess) []file.ReferenceAccess { - var refPath []file.ReferenceAccess +func newResolutions(nodePath []nodeAccess) []file.Resolution { + var refPath []file.Resolution for i, n := range nodePath { if i == len(nodePath)-1 && n.FileNode != nil { - // this is already on the parent ReferenceAccessVia object (unless it is a dead link) + // this is already on the parent Access object (unless it is a dead link) break } - access := file.ReferenceAccess{ + access := file.Resolution{ RequestPath: n.RequestPath, } if n.FileNode != nil { @@ -410,8 +410,8 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, } // FilesByGlob fetches zero to many file.References for the given glob pattern (considers symlinks). -func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { - var results []file.ReferenceAccessVia +func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([]file.Resolution, error) { + var results []file.Resolution if len(query) == 0 { return nil, fmt.Errorf("no glob pattern given") @@ -453,11 +453,11 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ return nil, err } // the Node must exist and should not be a directory - if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDir { - result := file.NewFileReferenceVia( + if fna.HasFileNode() && fna.FileNode.FileType != file.TypeDirectory { + result := file.NewResolution( matchPath, fna.FileNode.Reference, - newReferenceAccessPath(fna.LeafLinkResolution), + newResolutions(fna.LeafLinkResolution), ) if result != nil { results = append(results, *result) @@ -479,7 +479,7 @@ func (t *FileTree) AddFile(realPath file.Path) (*file.Reference, error) { } if fna.HasFileNode() { // this path already exists - if fna.FileNode.FileType != file.TypeReg { + if fna.FileNode.FileType != file.TypeRegular { return nil, fmt.Errorf("path=%q already exists but is NOT a regular file", realPath) } // this is a regular file, provide a new or existing file.Reference @@ -507,7 +507,7 @@ func (t *FileTree) AddSymLink(realPath file.Path, linkPath file.Path) (*file.Ref } if fna.HasFileNode() { // this path already exists - if fna.FileNode.FileType != file.TypeSymlink { + if fna.FileNode.FileType != file.TypeSymLink { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a symlink file, provide a new or existing file.Reference @@ -566,7 +566,7 @@ func (t *FileTree) AddDir(realPath file.Path) (*file.Reference, error) { } if fna.HasFileNode() { // this path already exists - if fna.FileNode.FileType != file.TypeDir { + if fna.FileNode.FileType != file.TypeDirectory { return nil, fmt.Errorf("path=%q already exists but is NOT a symlink file", realPath) } // this is a directory, provide a new or existing file.Reference @@ -826,7 +826,7 @@ func (t *FileTree) Merge(upper Reader) error { nodeCopy.Reference = lowerNode.FileNode.Reference } - if lowerNode.HasFileNode() && upperNode.FileType != file.TypeDir && lowerNode.FileNode.FileType == file.TypeDir { + if lowerNode.HasFileNode() && upperNode.FileType != file.TypeDirectory && lowerNode.FileNode.FileType == file.TypeDirectory { // NOTE: both upperNode and lowerNode paths are the same, and does not have an effect // on removal of child paths err := t.RemoveChildPaths(upperNode.RealPath) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 4dc9d23e..44effa0e 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -2,10 +2,11 @@ package filetree import ( "errors" + "testing" + "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" - "testing" "github.com/anchore/stereoscope/internal" "github.com/anchore/stereoscope/pkg/file" @@ -99,13 +100,11 @@ func TestFileTree_FilesByGlob_AncestorSymlink(t *testing.T) { expectedRef, err := tr.AddFile("/parent/file.txt") require.NoError(t, err) - expected := []file.ReferenceAccessVia{ + expected := []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/parent-link/file.txt", - Reference: expectedRef, - }, - LeafLinkResolution: nil, + RequestPath: "/parent-link/file.txt", + Reference: expectedRef, + LinkResolutions: nil, }, } @@ -492,7 +491,7 @@ func TestFileTree_Merge_DirOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if n.FileNode.FileType != file.TypeDir { + if n.FileNode.FileType != file.TypeDirectory { t.Errorf("did not override to dir") } @@ -531,7 +530,7 @@ func TestFileTree_Merge_RemoveChildPathsOnOverride(t *testing.T) { t.Fatalf("somehow override path does not exist?") } - if fileNode.FileNode.FileType != file.TypeReg { + if fileNode.FileNode.FileType != file.TypeRegular { t.Errorf("did not override to dir") } @@ -590,12 +589,10 @@ func TestFileTree_File_MultiSymlink(t *testing.T) { // - place/wagoodman // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt - expected := &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &file.Reference{RealPath: "/2/real-file.txt"}, - }, - LeafLinkResolution: []file.ReferenceAccess{ + expected := &file.Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []file.Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, @@ -672,12 +669,10 @@ func TestFileTree_File_MultiSymlink_deadlink(t *testing.T) { // - place/wagoodman // - place/wagoodman/file.txt -> link-to-1/file.txt -> 1/file.txt -> 2/real-file.txt - expected := &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &file.Reference{RealPath: "/1/file.txt"}, - }, - LeafLinkResolution: []file.ReferenceAccess{ + expected := &file.Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &file.Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []file.Resolution{ { RequestPath: "/place/wagoodman/file.txt", Reference: &file.Reference{RealPath: "/place/wagoodman/file.txt"}, @@ -728,7 +723,7 @@ func TestFileTree_File_Symlink(t *testing.T) { expectedExists bool // if the request path should exist or not expectedErr bool // if an error is expected from the request expectedRealRef bool // if the resolved reference should match the built reference from "buildRealPath" - expected *file.ReferenceAccessVia + expected *file.Resolution }{ /////////////////// { @@ -741,12 +736,10 @@ func TestFileTree_File_Symlink(t *testing.T) { // /another/place is the "real" reference that we followed, so we should expect the IDs to match upon lookup expectedRealRef: true, expectedExists: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/another/place"}, - }, - LeafLinkResolution: []file.ReferenceAccess{ + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + LinkResolutions: []file.Resolution{ { RequestPath: "/home", Reference: &file.Reference{RealPath: "/home"}, @@ -764,12 +757,10 @@ func TestFileTree_File_Symlink(t *testing.T) { // /home is just a symlink, not the real file (which is at /another/place)... and we've provided no symlink resolution expectedRealRef: false, expectedExists: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/home"}, - }, - LeafLinkResolution: nil, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, }, }, @@ -783,14 +774,12 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home/wagoodman", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) - LeafLinkResolution: nil, + LinkResolutions: nil, }, }, { @@ -805,14 +794,12 @@ func TestFileTree_File_Symlink(t *testing.T) { // why are we seeing a result that requires link resolution but we've requested no link resolution? // because there is always ancestor link resolution by default, and this example is only via // ancestors, thus the leaf is still resolved (since it doesn't have a link). - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. // this means that all resolution is on the ancestors (thus not a link resolution on the leaf) - LeafLinkResolution: nil, + LinkResolutions: nil, }, }, @@ -826,12 +813,10 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/another/place"}, - }, - LeafLinkResolution: []file.ReferenceAccess{ + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/another/place"}, + LinkResolutions: []file.Resolution{ { RequestPath: "/home", Reference: &file.Reference{RealPath: "/home"}, @@ -849,12 +834,10 @@ func TestFileTree_File_Symlink(t *testing.T) { expectedExists: true, // note that since the request matches the link source and we are NOT following, we get the link ref back expectedRealRef: false, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/home"}, - }, - LeafLinkResolution: nil, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, }, }, ///////////////// @@ -867,14 +850,12 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home/wagoodman", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. // (the symlink is for an ancestor... so we don't show link resolutions) - LeafLinkResolution: nil, + LinkResolutions: nil, }, }, { @@ -886,14 +867,12 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home/wagoodman", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, // note: the request is on the leaf, which is within a symlink, but is not a symlink itself. // (the symlink is for an ancestor... so we don't show link resolutions) - LeafLinkResolution: nil, + LinkResolutions: nil, }, }, /////////////// @@ -905,12 +884,10 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home", // since we did not follow, the paths should exist to the symlink file expectedExists: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/home"}, - }, - LeafLinkResolution: nil, + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: nil, }, }, { @@ -931,12 +908,10 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home", // we are following the path, which goes to nowhere.... the first failed path is resolved and returned expectedExists: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home", - Reference: &file.Reference{RealPath: "/home"}, - }, - LeafLinkResolution: []file.ReferenceAccess{ + expected: &file.Resolution{ + RequestPath: "/home", + Reference: &file.Reference{RealPath: "/home"}, + LinkResolutions: []file.Resolution{ { RequestPath: "/home", Reference: &file.Reference{RealPath: "/home"}, @@ -959,12 +934,10 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home/wagoodman", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, - LeafLinkResolution: nil, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + LinkResolutions: nil, }, }, { @@ -976,12 +949,10 @@ func TestFileTree_File_Symlink(t *testing.T) { requestPath: "/home/wagoodman", expectedExists: true, expectedRealRef: true, - expected: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/home/wagoodman", - Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, - }, - LeafLinkResolution: nil, + expected: &file.Resolution{ + RequestPath: "/home/wagoodman", + Reference: &file.Reference{RealPath: "/another/place/wagoodman"}, + LinkResolutions: nil, }, }, } @@ -1162,7 +1133,7 @@ func TestFileTree_AllFiles(t *testing.T) { }, { name: "reg", - types: []file.Type{file.TypeReg}, + types: []file.Type{file.TypeRegular}, expected: []string{"/home/a-file.txt", "/sym-linked-dest/a-.gif", "/hard-linked-dest/b-.gif"}, }, { @@ -1172,17 +1143,17 @@ func TestFileTree_AllFiles(t *testing.T) { }, { name: "symlink", - types: []file.Type{file.TypeSymlink}, + types: []file.Type{file.TypeSymLink}, expected: []string{"/home/symlink"}, }, { name: "multiple", - types: []file.Type{file.TypeReg, file.TypeSymlink}, + types: []file.Type{file.TypeRegular, file.TypeSymLink}, expected: []string{"/home/a-file.txt", "/sym-linked-dest/a-.gif", "/hard-linked-dest/b-.gif", "/home/symlink"}, }, { name: "dir", - types: []file.Type{file.TypeDir}, + types: []file.Type{file.TypeDirectory}, // note: only explicitly added directories exist in the catalog expected: []string{"/home"}, }, diff --git a/pkg/filetree/glob.go b/pkg/filetree/glob.go index 06e9d204..a99ad154 100644 --- a/pkg/filetree/glob.go +++ b/pkg/filetree/glob.go @@ -243,7 +243,7 @@ func (a *fileinfoAdapter) Mode() os.FileMode { // the underlying implementation for symlinks and hardlinks share the same semantics in the tree implementation // (meaning resolution is required) where as in a real file system this is taken care of by the driver // by making the file point to the same inode as another --making the indirection transparent to applications. - if a.Node.FileType == file.TypeSymlink || a.Node.FileType == file.TypeHardLink { + if a.Node.FileType == file.TypeSymLink || a.Node.FileType == file.TypeHardLink { mode |= os.ModeSymlink } return mode @@ -256,7 +256,7 @@ func (a *fileinfoAdapter) ModTime() time.Time { // IsDir is an abbreviation for Mode().IsDir(). func (a *fileinfoAdapter) IsDir() bool { - return a.Node.FileType == file.TypeDir + return a.Node.FileType == file.TypeDirectory } // Sys contains underlying data source (nothing in this case). diff --git a/pkg/filetree/glob_parser.go b/pkg/filetree/glob_parser.go index 8579d2ed..5f10acbd 100644 --- a/pkg/filetree/glob_parser.go +++ b/pkg/filetree/glob_parser.go @@ -6,12 +6,23 @@ import ( ) const ( + // searchByGlob is the default, unparsed/processed glob value searched directly against the filetree. searchByGlob searchBasis = iota - searchByPath + + // searchByFullPath indicates that the given glob value is not a glob, thus a (simpler) path lookup against the filetree should be performed as the search. + searchByFullPath + + // searchByExtension indicates cases like "**/*.py" where the only specific glob element indicates the file or directory extension. searchByExtension + + // searchByBasename indicates cases like "**/bin/python" where the only specific glob element indicates the file or directory basename (e.g. "python"). searchByBasename + + // searchByBasenameGlob indicates cases like "**/bin/python*" where the search space is limited to the full set of all basenames that match the given glob. searchByBasenameGlob - searchByParentBasename + + // searchBySubDirectory indicates cases like "**/var/lib/dpkg/status.d/*" where we're interested in selecting all files within a directory (but not the directory itself). + searchBySubDirectory ) type searchBasis int @@ -20,16 +31,16 @@ func (s searchBasis) String() string { switch s { case searchByGlob: return "glob" - case searchByPath: - return "path" + case searchByFullPath: + return "full-path" case searchByExtension: return "extension" case searchByBasename: return "basename" case searchByBasenameGlob: return "basename-glob" - case searchByParentBasename: - return "parent-basename" + case searchBySubDirectory: + return "subdirectory" } return "unknown search basis" } @@ -54,7 +65,7 @@ func parseGlob(glob string) []searchRequest { if !strings.ContainsAny(glob, "*?[]{}") { return []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: glob, }, } @@ -68,7 +79,7 @@ func parseGlob(glob string) []searchRequest { // special case: glob is a parent glob requests := []searchRequest{ { - searchBasis: searchByParentBasename, + searchBasis: searchBySubDirectory, value: nestedBasename, requirement: beforeBasename, }, diff --git a/pkg/filetree/glob_parser_test.go b/pkg/filetree/glob_parser_test.go index a770f61a..64ea8380 100644 --- a/pkg/filetree/glob_parser_test.go +++ b/pkg/filetree/glob_parser_test.go @@ -17,7 +17,7 @@ func Test_parseGlob(t *testing.T) { glob: "foo/bar/basename.txt", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: "foo/bar/basename.txt", }, }, @@ -27,7 +27,7 @@ func Test_parseGlob(t *testing.T) { glob: "/foo/bar/basename.txt", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: "/foo/bar/basename.txt", }, }, @@ -79,7 +79,7 @@ func Test_parseGlob(t *testing.T) { glob: "basename.txt", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: "basename.txt", }, }, @@ -247,7 +247,7 @@ func Test_parseGlob(t *testing.T) { glob: "**/foo/bar/*", want: []searchRequest{ { - searchBasis: searchByParentBasename, + searchBasis: searchBySubDirectory, value: "bar", requirement: "**/foo/bar", }, @@ -259,7 +259,7 @@ func Test_parseGlob(t *testing.T) { glob: "", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, }, }, }, @@ -268,7 +268,7 @@ func Test_parseGlob(t *testing.T) { glob: "/", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: "/", }, }, @@ -278,7 +278,7 @@ func Test_parseGlob(t *testing.T) { glob: "///", want: []searchRequest{ { - searchBasis: searchByPath, + searchBasis: searchByFullPath, value: "/", }, }, diff --git a/pkg/filetree/glob_test.go b/pkg/filetree/glob_test.go index e48c7ef6..e5671561 100644 --- a/pkg/filetree/glob_test.go +++ b/pkg/filetree/glob_test.go @@ -23,21 +23,21 @@ func TestFileInfoAdapter(t *testing.T) { VirtualPath: "/home/thing.txt", Node: filenode.FileNode{ RealPath: "/home/thing.txt", - FileType: file.TypeReg, + FileType: file.TypeRegular, }, }, "/home/wagoodman": { VirtualPath: "/home/wagoodman", Node: filenode.FileNode{ RealPath: "/home/wagoodman", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, "/home/thing": { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing", - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: "./thing.txt", }, }, @@ -240,16 +240,16 @@ func TestOSAdapter_ReadDir(t *testing.T) { expected: []fileinfoAdapter{ { VirtualPath: "/home/thing.txt", - Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: file.TypeReg}, + Node: filenode.FileNode{RealPath: "/home/thing.txt", FileType: file.TypeRegular}, }, { VirtualPath: "/home/wagoodman", - Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: file.TypeDir}, + Node: filenode.FileNode{RealPath: "/home/wagoodman", FileType: file.TypeDirectory}, }, { VirtualPath: "/home/thing", - Node: filenode.FileNode{RealPath: "/home/thing", FileType: file.TypeSymlink, LinkPath: "./thing.txt"}, + Node: filenode.FileNode{RealPath: "/home/thing", FileType: file.TypeSymLink, LinkPath: "./thing.txt"}, }, { VirtualPath: "/home/place", @@ -312,7 +312,7 @@ func TestOSAdapter_Lstat(t *testing.T) { VirtualPath: "/home", Node: filenode.FileNode{ RealPath: "/home", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, }, @@ -324,7 +324,7 @@ func TestOSAdapter_Lstat(t *testing.T) { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing", - FileType: file.TypeSymlink, + FileType: file.TypeSymLink, LinkPath: "./thing.txt", }, }, @@ -400,7 +400,7 @@ func TestOSAdapter_Stat(t *testing.T) { VirtualPath: "/home", Node: filenode.FileNode{ RealPath: "/home", - FileType: file.TypeDir, + FileType: file.TypeDirectory, }, }, }, @@ -413,7 +413,7 @@ func TestOSAdapter_Stat(t *testing.T) { VirtualPath: "/home/thing", Node: filenode.FileNode{ RealPath: "/home/thing.txt", - FileType: file.TypeReg, + FileType: file.TypeRegular, }, }, }, diff --git a/pkg/filetree/index_test.go b/pkg/filetree/index_test.go index e1abed49..d8cd6b91 100644 --- a/pkg/filetree/index_test.go +++ b/pkg/filetree/index_test.go @@ -23,21 +23,21 @@ func commonIndexFixture(t *testing.T) Index { ref, err := tree.AddDir(path) require.NoError(t, err, "failed to add DIR reference to index") require.NotNil(t, ref, "failed to add DIR reference to index (nil ref") - idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeDir, IsDir: true}) + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeDirectory, IsDir: true}) } addFile := func(path file.Path) { ref, err := tree.AddFile(path) require.NoError(t, err, "failed to add FILE reference to index") require.NotNil(t, ref, "failed to add FILE reference to index (nil ref") - idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeReg, MIMEType: "text/plain"}) + idx.Add(*ref, file.Metadata{Path: string(path), Type: file.TypeRegular, MIMEType: "text/plain"}) } addLink := func(from, to file.Path) { ref, err := tree.AddSymLink(from, to) require.NoError(t, err, "failed to add LINK reference to index") require.NotNil(t, ref, "failed to add LINK reference to index (nil ref") - idx.Add(*ref, file.Metadata{Path: string(from), LinkDestination: string(to), Type: file.TypeSymlink}) + idx.Add(*ref, file.Metadata{Path: string(from), LinkDestination: string(to), Type: file.TypeSymLink}) } // mkdir -p path/branch.d/one @@ -141,13 +141,13 @@ func TestFileCatalog_GetByFileType(t *testing.T) { }{ { name: "get real file", - input: []file.Type{file.TypeReg}, + input: []file.Type{file.TypeRegular}, want: []IndexEntry{ { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -155,7 +155,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -163,7 +163,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -171,7 +171,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -180,7 +180,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -188,7 +188,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -196,13 +196,13 @@ func TestFileCatalog_GetByFileType(t *testing.T) { }, { name: "get directories", - input: []file.Type{file.TypeDir}, + input: []file.Type{file.TypeDirectory}, want: []IndexEntry{ { Reference: file.Reference{RealPath: "/path"}, Metadata: file.Metadata{ Path: "/path", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -211,7 +211,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -219,7 +219,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one"}, Metadata: file.Metadata{ Path: "/path/branch.d/one", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -227,7 +227,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two"}, Metadata: file.Metadata{ Path: "/path/branch.d/two", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -235,7 +235,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Reference: file.Reference{RealPath: "/path/common"}, Metadata: file.Metadata{ Path: "/path/common", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -243,14 +243,14 @@ func TestFileCatalog_GetByFileType(t *testing.T) { }, { name: "get links", - input: []file.Type{file.TypeHardLink, file.TypeSymlink}, + input: []file.Type{file.TypeHardLink, file.TypeSymLink}, want: []IndexEntry{ { Reference: file.Reference{RealPath: "/path/common/branch.d"}, Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { @@ -258,7 +258,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { @@ -266,7 +266,7 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-4", LinkDestination: "path/branch.d/one/file-4.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { @@ -274,14 +274,14 @@ func TestFileCatalog_GetByFileType(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, }, { name: "get non-existent types", - input: []file.Type{file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo, file.TypeSocket, file.TypeIrregular}, + input: []file.Type{file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFIFO, file.TypeSocket, file.TypeIrregular}, want: []IndexEntry{}, }, } @@ -323,7 +323,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -332,7 +332,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -340,7 +340,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -355,7 +355,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -364,7 +364,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -375,7 +375,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { @@ -384,7 +384,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -397,7 +397,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -405,7 +405,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -419,7 +419,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -427,7 +427,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -477,7 +477,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -496,7 +496,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -505,7 +505,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -519,7 +519,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -569,14 +569,14 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -595,7 +595,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -604,7 +604,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -618,7 +618,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -667,7 +667,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -675,7 +675,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -683,7 +683,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -691,7 +691,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -699,7 +699,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -707,7 +707,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, diff --git a/pkg/filetree/interfaces.go b/pkg/filetree/interfaces.go index 851c0853..9d813fc4 100644 --- a/pkg/filetree/interfaces.go +++ b/pkg/filetree/interfaces.go @@ -20,8 +20,8 @@ type Reader interface { } type PathReader interface { - File(path file.Path, options ...LinkResolutionOption) (bool, *file.ReferenceAccessVia, error) - FilesByGlob(query string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) + File(path file.Path, options ...LinkResolutionOption) (bool, *file.Resolution, error) + FilesByGlob(query string, options ...LinkResolutionOption) ([]file.Resolution, error) AllRealPaths() []file.Path ListPaths(dir file.Path) ([]file.Path, error) HasPath(path file.Path, options ...LinkResolutionOption) bool diff --git a/pkg/filetree/node_access.go b/pkg/filetree/node_access.go index ec7e752f..dda333bd 100644 --- a/pkg/filetree/node_access.go +++ b/pkg/filetree/node_access.go @@ -19,14 +19,14 @@ func (na *nodeAccess) HasFileNode() bool { return na.FileNode != nil } -func (na *nodeAccess) FileReferenceVia() *file.ReferenceAccessVia { +func (na *nodeAccess) FileResolution() *file.Resolution { if !na.HasFileNode() { return nil } - return file.NewFileReferenceVia( + return file.NewResolution( na.RequestPath, na.FileNode.Reference, - newReferenceAccessPath(na.LeafLinkResolution), + newResolutions(na.LeafLinkResolution), ) } diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 823df8f4..0e35cb30 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -15,9 +15,9 @@ import ( // Searcher is a facade for searching a file tree with optional indexing support. type Searcher interface { - SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) - SearchByGlob(patterns string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) - SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) + SearchByPath(path string, options ...LinkResolutionOption) (*file.Resolution, error) + SearchByGlob(patterns string, options ...LinkResolutionOption) ([]file.Resolution, error) + SearchByMIMEType(mimeTypes ...string) ([]file.Resolution, error) } type searchContext struct { @@ -43,7 +43,7 @@ func NewSearchContext(tree Reader, index IndexReader) Searcher { } func (sc *searchContext) buildLinkResolutionIndex() error { - entries, err := sc.index.GetByFileType(file.TypeSymlink, file.TypeHardLink) + entries, err := sc.index.GetByFileType(file.TypeSymLink, file.TypeHardLink) if err != nil { return err } @@ -80,7 +80,7 @@ func (sc *searchContext) buildLinkResolutionIndex() error { return nil } -func (sc searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.ReferenceAccessVia, error) { +func (sc searchContext) SearchByPath(path string, options ...LinkResolutionOption) (*file.Resolution, error) { // TODO: one day this could leverage indexes outside of the tree, but today this is not implemented log.WithFields("path", path).Trace("searching filetree by path") @@ -89,7 +89,7 @@ func (sc searchContext) SearchByPath(path string, options ...LinkResolutionOptio return ref, err } -func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.Resolution, error) { log.WithFields("types", mimeTypes).Trace("searching filetree by MIME types") var fileEntries []IndexEntry @@ -107,14 +107,14 @@ func (sc searchContext) SearchByMIMEType(mimeTypes ...string) ([]file.ReferenceA return nil, err } - sort.Sort(file.ReferenceAccessVias(refs)) + sort.Sort(file.Resolutions(refs)) return refs, nil } // add case for status.d/* like things that hook up directly into filetree.ListPaths() -func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOption) ([]file.Resolution, error) { log.WithFields("glob", pattern).Trace("searching filetree by glob") if sc.index == nil { @@ -123,27 +123,27 @@ func (sc searchContext) SearchByGlob(pattern string, options ...LinkResolutionOp if err != nil { return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) } - sort.Sort(file.ReferenceAccessVias(refs)) + sort.Sort(file.Resolutions(refs)) return refs, nil } - var allRefs []file.ReferenceAccessVia + var allRefs []file.Resolution for _, request := range parseGlob(pattern) { - refs, err := sc.searchByGlob(request, options...) + refs, err := sc.searchByRequest(request, options...) if err != nil { return nil, fmt.Errorf("unable to search by glob=%q: %w", pattern, err) } allRefs = append(allRefs, refs...) } - sort.Sort(file.ReferenceAccessVias(allRefs)) + sort.Sort(file.Resolutions(allRefs)) return allRefs, nil } -func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResolutionOption) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) searchByRequest(request searchRequest, options ...LinkResolutionOption) ([]file.Resolution, error) { switch request.searchBasis { - case searchByPath: + case searchByFullPath: options = append(options, FollowBasenameLinks) ref, err := sc.SearchByPath(request.value, options...) if err != nil { @@ -152,7 +152,7 @@ func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResol if ref == nil { return nil, nil } - return []file.ReferenceAccessVia{*ref}, nil + return []file.Resolution{*ref}, nil case searchByBasename: indexes, err := sc.index.GetByBasename(request.value) if err != nil { @@ -183,7 +183,7 @@ func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResol return nil, err } return refs, nil - case searchByParentBasename: + case searchBySubDirectory: return sc.searchByParentBasename(request) case searchByGlob: @@ -196,7 +196,7 @@ func (sc searchContext) searchByGlob(request searchRequest, options ...LinkResol return nil, fmt.Errorf("invalid search request: %+v", request.searchBasis) } -func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Resolution, error) { indexes, err := sc.index.GetByBasename(request.value) if err != nil { return nil, fmt.Errorf("unable to search by extension=%q: %w", request.value, err) @@ -206,7 +206,7 @@ func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Re return nil, err } - var results []file.ReferenceAccessVia + var results []file.Resolution for _, ref := range refs { paths, err := sc.tree.ListPaths(ref.RequestPath) if err != nil { @@ -236,7 +236,7 @@ func (sc searchContext) searchByParentBasename(request searchRequest) ([]file.Re return results, nil } -func (sc searchContext) referencesWithRequirement(requirement string, entries []IndexEntry) ([]file.ReferenceAccessVia, error) { +func (sc searchContext) referencesWithRequirement(requirement string, entries []IndexEntry) ([]file.Resolution, error) { refs, err := sc.referencesInTree(entries) if err != nil { return nil, err @@ -246,7 +246,7 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] return refs, nil } - var results []file.ReferenceAccessVia + var results []file.Resolution for _, ref := range refs { matches, err := matchesRequirement(ref, requirement) if err != nil { @@ -260,7 +260,7 @@ func (sc searchContext) referencesWithRequirement(requirement string, entries [] return results, nil } -func matchesRequirement(ref file.ReferenceAccessVia, requirement string) (bool, error) { +func matchesRequirement(ref file.Resolution, requirement string) (bool, error) { allRefPaths := ref.AllRequestPaths() for _, p := range allRefPaths { matched, err := doublestar.Match(requirement, string(p)) @@ -433,8 +433,8 @@ allFileEntries: // referencesInTree does two things relative to the index entries given: // 1) it expands the index entries to include all possible access paths to the file node (by considering all possible link resolutions) // 2) it filters the index entries to only include those that exist in the tree -func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.ReferenceAccessVia, error) { - var refs []file.ReferenceAccessVia +func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.Resolution, error) { + var refs []file.Resolution for _, entry := range fileEntries { na, err := sc.tree.file(entry.Reference.RealPath, FollowBasenameLinks) @@ -448,7 +448,7 @@ func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.Refer } // expand the index results with more possible access paths from the link resolution cache - var expandedRefs []file.ReferenceAccessVia + var expandedRefs []file.Resolution allPathsToNode, err := sc.allPathsToNode(na.FileNode) if err != nil { return nil, fmt.Errorf("unable to get all paths to node for path=%q: %w", entry.Reference.RealPath, err) diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 30aadc3f..82d6957c 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -38,7 +38,7 @@ func Test_searchContext_SearchByPath(t *testing.T) { name string fields fields args args - want *file.ReferenceAccessVia + want *file.Resolution wantErr require.ErrorAssertionFunc }{ { @@ -47,12 +47,10 @@ func Test_searchContext_SearchByPath(t *testing.T) { args: args{ path: "/path/to/file.txt", }, - want: &file.ReferenceAccessVia{ - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + want: &file.Resolution{ + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -123,11 +121,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { require.NotNil(t, toRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*doubleLinkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*toRef, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*doubleLinkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*toRef, file.Metadata{Type: file.TypeDirectory}) defaultFields := fields{ tree: tree, @@ -138,7 +136,7 @@ func Test_searchContext_SearchByGlob(t *testing.T) { name string fields fields args args - want []file.ReferenceAccessVia + want []file.Resolution wantErr require.ErrorAssertionFunc }{ { @@ -149,29 +147,26 @@ func Test_searchContext_SearchByGlob(t *testing.T) { }, // note: result "/link-to-file" resolves to the file but does not show up since the request path // does not match the requirement glob - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/double-link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -185,13 +180,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { // dodge any ancestor symlink and will not find the file. glob: "**/link-to-path/to/file.txt", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -202,13 +195,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { args: args{ glob: "**/path/to/*", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -219,13 +210,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { args: args{ glob: "/path/to/*", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -236,13 +225,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { args: args{ glob: "**/link-to-path/to/*", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -256,13 +243,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { // dodge any ancestor symlink and will not find the file. glob: "**/double-link-to-path/to/file.txt", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/double-link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -273,15 +258,13 @@ func Test_searchContext_SearchByGlob(t *testing.T) { args: args{ glob: "**/link-to-file", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-file", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/link-to-file", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, - LeafLinkResolution: []file.ReferenceAccess{ + LinkResolutions: []file.Resolution{ { RequestPath: "/link-to-file", Reference: &file.Reference{ @@ -301,13 +284,11 @@ func Test_searchContext_SearchByGlob(t *testing.T) { // dodge any ancestor symlink and will not find the file. glob: "**/link-to-path/to/file.txt", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -321,30 +302,26 @@ func Test_searchContext_SearchByGlob(t *testing.T) { // dodge any ancestor symlink and will not find the file. glob: "**/*.txt", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{RealPath: "/path/to/file.txt"}, - }, + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{RealPath: "/path/to/file.txt"}, }, { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/double-link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/double-link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, // note: this is NOT expected since the input glob does not match against the request path //{ - // ReferenceAccess: file.ReferenceAccess{ + // Resolution: file.Resolution{ // RequestPath: "/link-to-file", // Reference: &file.Reference{ // RealPath: "/path/to/file.txt", // }, // }, - // LeafLinkResolution: []file.ReferenceAccess{ + // LinkResolutions: []file.Resolution{ // { // RequestPath: "/link-to-file", // Reference: &file.Reference{RealPath: "/link-to-file"}, @@ -352,11 +329,9 @@ func Test_searchContext_SearchByGlob(t *testing.T) { // }, //}, { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/link-to-path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/link-to-path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -419,7 +394,7 @@ func Test_searchContext_SearchByMIMEType(t *testing.T) { name string fields fields args args - want []file.ReferenceAccessVia + want []file.Resolution wantErr require.ErrorAssertionFunc }{ { @@ -428,13 +403,11 @@ func Test_searchContext_SearchByMIMEType(t *testing.T) { args: args{ mimeTypes: "plain/text", }, - want: []file.ReferenceAccessVia{ + want: []file.Resolution{ { - ReferenceAccess: file.ReferenceAccess{ - RequestPath: "/path/to/file.txt", - Reference: &file.Reference{ - RealPath: "/path/to/file.txt", - }, + RequestPath: "/path/to/file.txt", + Reference: &file.Reference{ + RealPath: "/path/to/file.txt", }, }, }, @@ -499,7 +472,7 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) na, err := tree.node("/path/to", linkResolutionStrategy{ FollowAncestorLinks: false, @@ -534,8 +507,8 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*deafLinkRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*deafLinkRef, file.Metadata{Type: file.TypeSymLink}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -576,9 +549,9 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, link3) idx := NewIndex() - idx.Add(*link1, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*link2, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*link3, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*link1, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*link2, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*link3, file.Metadata{Type: file.TypeSymLink}) na, err := tree.node(link1.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -628,11 +601,11 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, x11LinkRef) idx := NewIndex() - idx.Add(*usrRef, file.Metadata{Type: file.TypeDir}) - idx.Add(*usrBinRef, file.Metadata{Type: file.TypeDir}) - idx.Add(*binLinkRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*x11LinkRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*ttydRef, file.Metadata{Type: file.TypeReg}) + idx.Add(*usrRef, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*usrBinRef, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*binLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*x11LinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*ttydRef, file.Metadata{Type: file.TypeRegular}) na, err := tree.node(ttydRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -668,8 +641,8 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -710,9 +683,9 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*doubleLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*doubleLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -752,9 +725,9 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -799,10 +772,10 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -857,12 +830,12 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*anotherLinkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*anotherLinkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*anotherLinkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*anotherLinkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -924,13 +897,13 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) - idx.Add(*dirAnother, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDirectory}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, @@ -992,13 +965,13 @@ func Test_searchContext_allPathsToNode(t *testing.T) { require.NotNil(t, fileRef) idx := NewIndex() - idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeReg}) - idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymlink}) - idx.Add(*dirTo, file.Metadata{Type: file.TypeDir}) - idx.Add(*dirAnother, file.Metadata{Type: file.TypeDir}) + idx.Add(*fileRef, file.Metadata{MIMEType: "plain/text", Type: file.TypeRegular}) + idx.Add(*linkToAnotherViaLinkRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToPathRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*linkToToRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*pathToLinkToFileRef, file.Metadata{Type: file.TypeSymLink}) + idx.Add(*dirTo, file.Metadata{Type: file.TypeDirectory}) + idx.Add(*dirAnother, file.Metadata{Type: file.TypeDirectory}) na, err := tree.node(fileRef.RealPath, linkResolutionStrategy{ FollowAncestorLinks: false, diff --git a/pkg/image/file_catalog_test.go b/pkg/image/file_catalog_test.go index 14af105c..a6100156 100644 --- a/pkg/image/file_catalog_test.go +++ b/pkg/image/file_catalog_test.go @@ -230,7 +230,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -239,7 +239,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -247,7 +247,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -262,7 +262,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -271,7 +271,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -282,7 +282,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { @@ -291,7 +291,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -304,7 +304,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -312,7 +312,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -326,7 +326,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -334,7 +334,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -395,7 +395,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -414,7 +414,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -423,7 +423,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -437,7 +437,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -498,14 +498,14 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -524,7 +524,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d"}, Metadata: file.Metadata{ Path: "/path/branch.d", - Type: file.TypeDir, + Type: file.TypeDirectory, IsDir: true, }, }, @@ -533,7 +533,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/branch.d", LinkDestination: "path/branch.d", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -547,7 +547,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) { Metadata: file.Metadata{ Path: "/path/common/file-1.d", LinkDestination: "path/branch.d/one/file-1.txt", - Type: file.TypeSymlink, + Type: file.TypeSymLink, }, }, }, @@ -607,7 +607,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/.file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/.file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -615,7 +615,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-1.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-1.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -623,7 +623,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.d"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.d", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -631,7 +631,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/one/file-4.tar.gz"}, Metadata: file.Metadata{ Path: "/path/branch.d/one/file-4.tar.gz", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -639,7 +639,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/branch.d/two/file-2.txt"}, Metadata: file.Metadata{ Path: "/path/branch.d/two/file-2.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, @@ -647,7 +647,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) { Reference: file.Reference{RealPath: "/path/file-3.txt"}, Metadata: file.Metadata{ Path: "/path/file-3.txt", - Type: file.TypeReg, + Type: file.TypeRegular, MIMEType: "text/plain", }, }, diff --git a/pkg/image/image.go b/pkg/image/image.go index b415ca41..9798197b 100644 --- a/pkg/image/image.go +++ b/pkg/image/image.go @@ -322,7 +322,7 @@ func (i *Image) FileContentsByRef(ref file.Reference) (io.ReadCloser, error) { // ResolveLinkByLayerSquash resolves a symlink or hardlink for the given file reference relative to the result from // the layer squash of the given layer index argument. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.ReferenceAccessVia, error) { +func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options ...filetree.LinkResolutionOption) (*file.Resolution, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[layer].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err @@ -330,7 +330,7 @@ func (i *Image) ResolveLinkByLayerSquash(ref file.Reference, layer int, options // ResolveLinkByImageSquash resolves a symlink or hardlink for the given file reference relative to the result from the image squash. // If the given file reference is not a link type, or is a unresolvable (dead) link, then the given file reference is returned. -func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.ReferenceAccessVia, error) { +func (i *Image) ResolveLinkByImageSquash(ref file.Reference, options ...filetree.LinkResolutionOption) (*file.Resolution, error) { allOptions := append([]filetree.LinkResolutionOption{filetree.FollowBasenameLinks}, options...) _, resolvedRef, err := i.Layers[len(i.Layers)-1].SquashedTree.File(ref.RealPath, allOptions...) return resolvedRef, err From 386c9452f15073e4f989565ef0b66d107e28b5f9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 11:34:40 -0500 Subject: [PATCH 29/35] ensure that glob results match search facade Signed-off-by: Alex Goodman --- pkg/filetree/filetree.go | 3 +++ pkg/filetree/search_test.go | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 8c5e91cd..c95883e5 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "path" + "sort" "strings" "github.com/scylladb/go-set/iset" @@ -465,6 +466,8 @@ func (t *FileTree) FilesByGlob(query string, options ...LinkResolutionOption) ([ } } + sort.Sort(file.Resolutions(results)) + return results, nil } diff --git a/pkg/filetree/search_test.go b/pkg/filetree/search_test.go index 82d6957c..6033646c 100644 --- a/pkg/filetree/search_test.go +++ b/pkg/filetree/search_test.go @@ -364,6 +364,13 @@ func Test_searchContext_SearchByGlob(t *testing.T) { if d := cmp.Diff(tt.want, got, opts...); d != "" { t.Errorf("SearchByGlob() mismatch (-want +got):\n%s", d) } + + expected, err := tt.fields.tree.FilesByGlob(tt.args.glob, tt.args.options...) + require.NoError(t, err) + + if d := cmp.Diff(expected, got, opts...); d != "" { + t.Errorf("Difference relative to tree results mismatch (-want +got):\n%s", d) + } }) } } @@ -1001,7 +1008,8 @@ func Test_searchContext_allPathsToNode(t *testing.T) { if err != nil { return } - assert.ElementsMatchf(t, tt.want, got, cmp.Diff(tt.want, got)) + + assert.ElementsMatchf(t, tt.want, got, cmp.Diff(tt.want, got), "expected and actual paths should match") }) } } From 307ac9b042abc1fa557e19b0f980896aaec944a9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 18:52:18 -0500 Subject: [PATCH 30/35] replace stringset implementation + move resolution tests Signed-off-by: Alex Goodman --- pkg/file/reference_test.go | 279 ---------------------------------- pkg/file/resolution.go | 4 +- pkg/file/resolution_test.go | 275 ++++++++++++++++++++++++++++++++- pkg/filetree/filetree.go | 14 +- pkg/filetree/filetree_test.go | 10 +- pkg/filetree/search.go | 2 +- 6 files changed, 289 insertions(+), 295 deletions(-) delete mode 100644 pkg/file/reference_test.go diff --git a/pkg/file/reference_test.go b/pkg/file/reference_test.go deleted file mode 100644 index 9782f67d..00000000 --- a/pkg/file/reference_test.go +++ /dev/null @@ -1,279 +0,0 @@ -package file - -import ( - "github.com/stretchr/testify/assert" - "testing" -) - -func TestResolution_RequestResolutionPath(t *testing.T) { - tests := []struct { - name string - subject Resolution - want []Path - }{ - { - name: "empty", - subject: Resolution{ - LinkResolutions: nil, - }, - want: nil, - }, - { - name: "single ref", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{ - id: 1, - RealPath: "/home/wagoodman/file.txt", - }, - LinkResolutions: nil, - }, - want: []Path{ - "/home/wagoodman/file.txt", - }, - }, - { - // /home -> /another/place - name: "ref with 1 leaf link resolutions", - subject: Resolution{ - RequestPath: "/home", - Reference: &Reference{RealPath: "/another/place"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/home", - Reference: &Reference{RealPath: "/home"}, - }, - }, - }, - want: []Path{ - "/home", - "/another/place", - }, - }, - { - // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt - - // this is the current state of the filetree - // . - // ├── 1 - // │ ├── file.txt -> 2/real-file.txt - // │ └── link-to-place -> place - // ├── 2 - // │ └── real-file.txt - // ├── home -> link-to-1/link-to-place - // ├── link-to-1 -> 1 - // └── place - // └── wagoodman - // └── file.txt -> link-to-1/file.txt - - name: "ref with 2 leaf link resolutions", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{RealPath: "/2/real-file.txt"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/place/wagoodman/file.txt", - Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, - }, - { - RequestPath: "/1/file.txt", - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - }, - }, - want: []Path{ - "/home/wagoodman/file.txt", // request - "/place/wagoodman/file.txt", // real intermediate path - "/1/file.txt", // real intermediate path - "/2/real-file.txt", // final resolved path on the reference - }, - }, - { - // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt - - // this is the current state of the filetree - // . - // ├── 1 - // │ ├── file.txt -> 2/real-file.txt - // │ └── link-to-place -> place - // ├── home -> link-to-1/link-to-place - // ├── link-to-1 -> 1 - // └── place - // └── wagoodman - // └── file.txt -> link-to-1/file.txt - - name: "ref with dead link", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - // note: this falls back to the last path that exists which is the behavior for link resolution options: - // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} - Reference: &Reference{RealPath: "/1/file.txt"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/place/wagoodman/file.txt", - Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, - }, - { - RequestPath: "/1/file.txt", - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - { - RequestPath: "/2/real-file.txt", - // nope! it's dead! - //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, - }, - }, - }, - want: []Path{ - "/home/wagoodman/file.txt", // request - "/place/wagoodman/file.txt", // real intermediate path - "/1/file.txt", // real intermediate path - "/2/real-file.txt", // final resolved path on the reference (that does not exist) - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, tt.subject.RequestResolutionPath(), "RequestResolutionPath()") - }) - } -} - -func TestReferenceResolutionVia_ResolutionReferences(t *testing.T) { - type fields struct { - ReferenceResolution Resolution - LeafLinkResolution []Resolution - } - tests := []struct { - name string - subject Resolution - want []Reference - }{ - { - name: "empty", - subject: Resolution{ - LinkResolutions: nil, - }, - want: nil, - }, - { - name: "single ref", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{ - id: 1, - RealPath: "/home/wagoodman/file.txt", - }, - LinkResolutions: nil, - }, - want: []Reference{ - { - id: 1, - RealPath: "/home/wagoodman/file.txt", - }, - }, - }, - { - // /home -> /another/place - name: "ref with 1 leaf link resolutions", - subject: Resolution{ - RequestPath: "/home", - Reference: &Reference{RealPath: "/another/place"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/home", - Reference: &Reference{RealPath: "/home"}, - }, - }, - }, - want: []Reference{ - {RealPath: "/home"}, - {RealPath: "/another/place"}, - }, - }, - { - // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt - - // this is the current state of the filetree - // . - // ├── 1 - // │ ├── file.txt -> 2/real-file.txt - // │ └── link-to-place -> place - // ├── 2 - // │ └── real-file.txt - // ├── home -> link-to-1/link-to-place - // ├── link-to-1 -> 1 - // └── place - // └── wagoodman - // └── file.txt -> link-to-1/file.txt - - name: "ref with 2 leaf link resolutions", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - Reference: &Reference{RealPath: "/2/real-file.txt"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/place/wagoodman/file.txt", - Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, - }, - { - RequestPath: "/1/file.txt", - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - }, - }, - want: []Reference{ - {RealPath: "/place/wagoodman/file.txt"}, - {RealPath: "/1/file.txt"}, - {RealPath: "/2/real-file.txt"}, - }, - }, - { - // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt - - // this is the current state of the filetree - // . - // ├── 1 - // │ ├── file.txt -> 2/real-file.txt - // │ └── link-to-place -> place - // ├── home -> link-to-1/link-to-place - // ├── link-to-1 -> 1 - // └── place - // └── wagoodman - // └── file.txt -> link-to-1/file.txt - - name: "ref with dead link", - subject: Resolution{ - RequestPath: "/home/wagoodman/file.txt", - // note: this falls back to the last path that exists which is the behavior for link resolution options: - // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} - Reference: &Reference{RealPath: "/1/file.txt"}, - LinkResolutions: []Resolution{ - { - RequestPath: "/place/wagoodman/file.txt", - Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, - }, - { - RequestPath: "/1/file.txt", - Reference: &Reference{RealPath: "/1/file.txt"}, - }, - { - RequestPath: "/2/real-file.txt", - // nope! it's dead! - //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, - }, - }, - }, - want: []Reference{ - {RealPath: "/place/wagoodman/file.txt"}, - {RealPath: "/1/file.txt"}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, tt.want, tt.subject.ResolutionReferences(), "ResolutionReferences()") - - }) - } -} diff --git a/pkg/file/resolution.go b/pkg/file/resolution.go index 738ebd49..5f10c719 100644 --- a/pkg/file/resolution.go +++ b/pkg/file/resolution.go @@ -135,8 +135,8 @@ func (f *Resolution) RequestResolutionPath() []Path { return paths } -// ResolutionReferences represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. -func (f *Resolution) ResolutionReferences() []Reference { +// References represents the traversal through the filesystem to access to current reference, including all symlink and hardlink resolution. +func (f *Resolution) References() []Reference { var refs []Reference var lastLinkResolutionIsDead bool diff --git a/pkg/file/resolution_test.go b/pkg/file/resolution_test.go index 45a7e994..27692eb9 100644 --- a/pkg/file/resolution_test.go +++ b/pkg/file/resolution_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -func TestReferenceResolutionVias_Less(t *testing.T) { +func TestResolution_Less(t *testing.T) { realA := Resolution{ @@ -119,3 +119,276 @@ func TestReferenceResolutionVias_Less(t *testing.T) { }) } } + +func TestResolution_RequestResolutionPath(t *testing.T) { + tests := []struct { + name string + subject Resolution + want []Path + }{ + { + name: "empty", + subject: Resolution{ + LinkResolutions: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + LinkResolutions: nil, + }, + want: []Path{ + "/home/wagoodman/file.txt", + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Path{ + "/home", + "/another/place", + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Path{ + "/home/wagoodman/file.txt", // request + "/place/wagoodman/file.txt", // real intermediate path + "/1/file.txt", // real intermediate path + "/2/real-file.txt", // final resolved path on the reference (that does not exist) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.RequestResolutionPath(), "RequestResolutionPath()") + }) + } +} + +func TestResolution_References(t *testing.T) { + type fields struct { + ReferenceResolution Resolution + LeafLinkResolution []Resolution + } + tests := []struct { + name string + subject Resolution + want []Reference + }{ + { + name: "empty", + subject: Resolution{ + LinkResolutions: nil, + }, + want: nil, + }, + { + name: "single ref", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{ + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + LinkResolutions: nil, + }, + want: []Reference{ + { + id: 1, + RealPath: "/home/wagoodman/file.txt", + }, + }, + }, + { + // /home -> /another/place + name: "ref with 1 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home", + Reference: &Reference{RealPath: "/another/place"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/home", + Reference: &Reference{RealPath: "/home"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/home"}, + {RealPath: "/another/place"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── 2 + // │ └── real-file.txt + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with 2 leaf link resolutions", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + Reference: &Reference{RealPath: "/2/real-file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + {RealPath: "/2/real-file.txt"}, + }, + }, + { + // /home/wagoodman/file.txt -> /place/wagoodman/file.txt -> /1/file.txt -> /2/real-file.txt + + // this is the current state of the filetree + // . + // ├── 1 + // │ ├── file.txt -> 2/real-file.txt + // │ └── link-to-place -> place + // ├── home -> link-to-1/link-to-place + // ├── link-to-1 -> 1 + // └── place + // └── wagoodman + // └── file.txt -> link-to-1/file.txt + + name: "ref with dead link", + subject: Resolution{ + RequestPath: "/home/wagoodman/file.txt", + // note: this falls back to the last path that exists which is the behavior for link resolution options: + // []LinkResolutionOption{FollowBasenameLinks, DoNotFollowDeadBasenameLinks} + Reference: &Reference{RealPath: "/1/file.txt"}, + LinkResolutions: []Resolution{ + { + RequestPath: "/place/wagoodman/file.txt", + Reference: &Reference{RealPath: "/place/wagoodman/file.txt"}, + }, + { + RequestPath: "/1/file.txt", + Reference: &Reference{RealPath: "/1/file.txt"}, + }, + { + RequestPath: "/2/real-file.txt", + // nope! it's dead! + //Reference: &file.Reference{RealPath: "/2/real-file.txt"}, + }, + }, + }, + want: []Reference{ + {RealPath: "/place/wagoodman/file.txt"}, + {RealPath: "/1/file.txt"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, tt.subject.References(), "References()") + + }) + } +} diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index c95883e5..4d87949e 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -3,13 +3,13 @@ package filetree import ( "errors" "fmt" + "github.com/scylladb/go-set/strset" "path" "sort" "strings" "github.com/scylladb/go-set/iset" - "github.com/anchore/stereoscope/internal" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree/filenode" "github.com/anchore/stereoscope/pkg/tree" @@ -342,7 +342,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentNodeAccess := n // keep resolving links until a regular file or directory is found - alreadySeen := internal.NewStringSet() + alreadySeen := strset.New() var err error for { nodePath = append(nodePath, *currentNodeAccess) @@ -356,7 +356,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, break } - if alreadySeen.Contains(string(currentNodeAccess.FileNode.RealPath)) { + if alreadySeen.Has(string(currentNodeAccess.FileNode.RealPath)) { return nil, ErrLinkCycleDetected } @@ -714,24 +714,24 @@ func (t *FileTree) TreeReader() tree.Reader { // PathDiff shows the path differences between two trees (useful for testing) func (t *FileTree) PathDiff(other *FileTree) (extra, missing []file.Path) { - ourPaths := internal.NewStringSet() + ourPaths := strset.New() for _, fn := range t.tree.Nodes() { ourPaths.Add(string(fn.ID())) } - theirPaths := internal.NewStringSet() + theirPaths := strset.New() for _, fn := range other.tree.Nodes() { theirPaths.Add(string(fn.ID())) } for _, fn := range other.tree.Nodes() { - if !ourPaths.Contains(string(fn.ID())) { + if !ourPaths.Has(string(fn.ID())) { extra = append(extra, file.Path(fn.ID())) } } for _, fn := range t.tree.Nodes() { - if !theirPaths.Contains(string(fn.ID())) { + if !theirPaths.Has(string(fn.ID())) { missing = append(missing, file.Path(fn.ID())) } } diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 44effa0e..e2d592cc 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -2,13 +2,13 @@ package filetree import ( "errors" + "github.com/scylladb/go-set/strset" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" - "github.com/anchore/stereoscope/internal" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree/filenode" "github.com/stretchr/testify/assert" @@ -335,8 +335,8 @@ func TestFileTree_FilesByGlob(t *testing.T) { return } - actualSet := internal.NewStringSet() - expectedSet := internal.NewStringSet() + actualSet := strset.New() + expectedSet := strset.New() for _, r := range actual { actualSet.Add(string(r.RequestPath)) @@ -344,13 +344,13 @@ func TestFileTree_FilesByGlob(t *testing.T) { for _, e := range test.expected { expectedSet.Add(e) - if !actualSet.Contains(e) { + if !actualSet.Has(e) { t.Errorf("missing search hit: %s", e) } } for _, r := range actual { - if !expectedSet.Contains(string(r.RequestPath)) { + if !expectedSet.Has(string(r.RequestPath)) { t.Errorf("extra search hit: %+v", r) } } diff --git a/pkg/filetree/search.go b/pkg/filetree/search.go index 0e35cb30..e5152ab7 100644 --- a/pkg/filetree/search.go +++ b/pkg/filetree/search.go @@ -465,7 +465,7 @@ func (sc searchContext) referencesInTree(fileEntries []IndexEntry) ([]file.Resol } for _, ref := range expandedRefs { - for _, accessRef := range ref.ResolutionReferences() { + for _, accessRef := range ref.References() { if accessRef.ID() == entry.Reference.ID() { // we know this entry exists in the tree, keep track of the reference for this file refs = append(refs, ref) From 4f28a64f59b4b3a14c1c7cde85d7e11083bbfa9a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 18:54:45 -0500 Subject: [PATCH 31/35] add note about podman dependency for testing Signed-off-by: Alex Goodman --- DEVELOPING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DEVELOPING.md b/DEVELOPING.md index daa5551d..43577bb2 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -6,6 +6,7 @@ In order to test and develop in this repo you will need the following dependenci - Golang - docker - make +- podman (for benchmark and integration tests only) After cloning the following step can help you get setup: 1. run `make bootstrap` to download go mod dependencies, create the `/.tmp` dir, and download helper utilities. From db2e56d712fd5ebc3977b7440d81ca2ab7a5d4b9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 19:06:33 -0500 Subject: [PATCH 32/35] address PR comments Signed-off-by: Alex Goodman --- pkg/file/resolution_test.go | 3 --- pkg/filetree/builder.go | 6 ++++-- pkg/filetree/depth_first_path_walker.go | 8 +++++--- pkg/filetree/filetree.go | 3 ++- test/integration/fixture_image_simple_test.go | 10 +++++----- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pkg/file/resolution_test.go b/pkg/file/resolution_test.go index 27692eb9..3a444950 100644 --- a/pkg/file/resolution_test.go +++ b/pkg/file/resolution_test.go @@ -9,7 +9,6 @@ import ( func TestResolution_Less(t *testing.T) { realA := Resolution{ - RequestPath: "/parent/a", Reference: &Reference{ RealPath: "/parent/a", @@ -17,7 +16,6 @@ func TestResolution_Less(t *testing.T) { } realB := Resolution{ - RequestPath: "/parent/b", Reference: &Reference{ RealPath: "/parent/b", @@ -25,7 +23,6 @@ func TestResolution_Less(t *testing.T) { } linkToA := Resolution{ - RequestPath: "/parent-link/a", Reference: &Reference{ RealPath: "/a", diff --git a/pkg/filetree/builder.go b/pkg/filetree/builder.go index 0cfbb86f..1f017f8e 100644 --- a/pkg/filetree/builder.go +++ b/pkg/filetree/builder.go @@ -20,8 +20,10 @@ func NewBuilder(tree Writer, index IndexWriter) *Builder { } func (b *Builder) Add(metadata file.Metadata) (*file.Reference, error) { - var ref *file.Reference - var err error + var ( + ref *file.Reference + err error + ) switch metadata.Type { case file.TypeSymLink: ref, err = b.tree.AddSymLink(file.Path(metadata.Path), file.Path(metadata.LinkDestination)) diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index 061bf84a..0cdc8822 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -58,9 +58,11 @@ func NewDepthFirstPathWalker(tree *FileTree, visitor FileNodeVisitor, conditions func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNode, error) { w.pathStack.Push(from) - var currentPath file.Path - var currentNode *nodeAccess - var err error + var ( + currentPath file.Path + currentNode *nodeAccess + err error + ) for w.pathStack.Size() > 0 { currentPath = w.pathStack.Pop() diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 4d87949e..9352e8d9 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -3,11 +3,12 @@ package filetree import ( "errors" "fmt" - "github.com/scylladb/go-set/strset" "path" "sort" "strings" + "github.com/scylladb/go-set/strset" + "github.com/scylladb/go-set/iset" "github.com/anchore/stereoscope/pkg/file" diff --git a/test/integration/fixture_image_simple_test.go b/test/integration/fixture_image_simple_test.go index 44260c84..2a378015 100644 --- a/test/integration/fixture_image_simple_test.go +++ b/test/integration/fixture_image_simple_test.go @@ -190,7 +190,7 @@ func BenchmarkSimpleImage_FetchSquashedContents(b *testing.B) { func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues testCase) { t.Helper() - //t.Log("Asserting metadata...") + t.Log("Asserting metadata...") if i.Metadata.MediaType != expectedValues.imageMediaType { t.Errorf("unexpected image media type: %+v", i.Metadata.MediaType) @@ -228,7 +228,7 @@ func assertImageSimpleMetadata(t *testing.T, i *image.Image, expectedValues test func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { t.Helper() - //t.Log("Asserting squashed trees...") + t.Log("Asserting squashed trees...") one := filetree.New() one.AddFile("/somefile-1.txt") @@ -267,7 +267,7 @@ func assertImageSimpleSquashedTrees(t *testing.T, i *image.Image) { func assertImageSimpleTrees(t *testing.T, i *image.Image) { t.Helper() - //t.Log("Asserting trees...") + t.Log("Asserting trees...") one := filetree.New() one.AddFile("/somefile-1.txt") @@ -296,7 +296,7 @@ func assertImageSimpleTrees(t *testing.T, i *image.Image) { func assertImageSimpleContents(t *testing.T, i *image.Image) { t.Helper() - //t.Log("Asserting contents...") + t.Log("Asserting contents...") expectedContents := map[string]string{ "/somefile-1.txt": "this file has contents", @@ -306,7 +306,7 @@ func assertImageSimpleContents(t *testing.T, i *image.Image) { actualContents := make(map[string]io.Reader) for path := range expectedContents { - reader, err := i.FileContentsFromSquash(file.Path(path)) + reader, err := i.OpenPathFromSquash(file.Path(path)) if err != nil { t.Fatal("unable to fetch multiple contents", err) } From d54c328830361daef7293d855d63ce0dd58c705a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 19:45:06 -0500 Subject: [PATCH 33/35] remove extra whitespace Signed-off-by: Alex Goodman --- .github/actions/bootstrap/action.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/bootstrap/action.yaml b/.github/actions/bootstrap/action.yaml index 5544402d..d4ed5970 100644 --- a/.github/actions/bootstrap/action.yaml +++ b/.github/actions/bootstrap/action.yaml @@ -78,4 +78,3 @@ runs: shell: bash run: | DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }} - From 3153d538e454285c0004096890fae16ab919e3a8 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 7 Feb 2023 19:53:26 -0500 Subject: [PATCH 34/35] constrain OS build support Signed-off-by: Alex Goodman --- .github/scripts/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh index b911da8a..50beb016 100755 --- a/.github/scripts/build.sh +++ b/.github/scripts/build.sh @@ -24,7 +24,7 @@ FLAGS=${FLAGS:-"-ldflags=\"-s -w\""} # A list of OSes and architectures to not build for, space-separated # It can be set from the command line when the script is called. -NOT_ALLOWED_OS=${NOT_ALLOWED_OS:-"js android ios solaris illumos aix dragonfly plan9"} +NOT_ALLOWED_OS=${NOT_ALLOWED_OS:-"js android ios solaris illumos aix dragonfly plan9 freebsd openbsd netbsd"} NOT_ALLOWED_ARCH=${NOT_ALLOWED_ARCH:-"riscv64 mips mips64 mips64le ppc64 ppc64le s390x wasm"} From 90b1ac403002352690d690e89f5c1e6848205039 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 8 Feb 2023 10:36:18 -0500 Subject: [PATCH 35/35] update/remove TODO comments Signed-off-by: Alex Goodman --- pkg/file/metadata.go | 2 -- pkg/filetree/depth_first_path_walker.go | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/file/metadata.go b/pkg/file/metadata.go index 2a6358ff..990e7a4f 100644 --- a/pkg/file/metadata.go +++ b/pkg/file/metadata.go @@ -111,8 +111,6 @@ func NewMetadataFromPath(path string, info os.FileInfo) Metadata { mimeType = MIMEType(f) } - // TODO: should we clean up path to be the real, absolute path? - return Metadata{ Path: path, Mode: info.Mode(), diff --git a/pkg/filetree/depth_first_path_walker.go b/pkg/filetree/depth_first_path_walker.go index 0cdc8822..71d3d7f4 100644 --- a/pkg/filetree/depth_first_path_walker.go +++ b/pkg/filetree/depth_first_path_walker.go @@ -66,7 +66,7 @@ func (w *DepthFirstPathWalker) Walk(from file.Path) (file.Path, *filenode.FileNo for w.pathStack.Size() > 0 { currentPath = w.pathStack.Pop() - // TODO: should we make these link resolutions configurable so you can observe the links on walk as well? + // TODO: should we make these link resolutions configurable so you can observe the links on walk as well? (take link resolution options as a parameter) currentNode, err = w.tree.node(currentPath, linkResolutionStrategy{ FollowAncestorLinks: true, FollowBasenameLinks: true,