From f3c5c79c7456cfc918c368de09383bc05602ba51 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 21 Feb 2023 11:04:30 -0500 Subject: [PATCH 01/14] test: add failing test for cycle case Signed-off-by: Christopher Phillips --- pkg/filetree/filetree_test.go | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index e2d592cc..2d822175 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1079,6 +1079,7 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { // the test.... do we stop when a cycle is detected? exists, _, err := tr.File("/somewhere/acorn", FollowBasenameLinks) + // TODO: check this case if err != ErrLinkCycleDetected { t.Fatalf("should have gotten an error on resolving a file") } @@ -1089,6 +1090,39 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { } +func TestFileTree_File_DeadCycleDetection_BAD(t *testing.T) { + tr := New() + _, err := tr.AddFile("/usr/bin/ksh93") + require.NoError(t, err) + _, err = tr.AddSymLink("/bin", "/usr/bin") + require.NoError(t, err) + _, err = tr.AddSymLink("/usr/bin/ksh", "/etc/alternatives/ksh") + require.NoError(t, err) + _, err = tr.AddSymLink("/etc/alternatives/ksh", "/bin/ksh93") + require.NoError(t, err) + _, err = tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") + require.NoError(t, err) + + /* + /usr/bin/ksh93 <-- Real file + /bin -> /usr/bin + /usr/bin/ksh -> /etc/alternatives/ksh + /etc/alternatives/ksh -> /bin/ksh93 + */ + + // ls -al /usr/local/bin/ksh + // /usr/local/bin/ksh -> /bin/ksh + // ls -al /bin/ksh + // /bin/ksh -> /etc/alternatives/ksh + // /etc/alternatives/ksh + // /etc/alternatives/ksh -> /bin/ksh93 + // /bin/ksh93 + + // the test.... do we stop when a cycle is detected? + _, _, err = tr.File("/usr/local/bin/ksh", FollowBasenameLinks) + require.NoError(t, err) +} + func TestFileTree_AllFiles(t *testing.T) { tr := New() From 3526f2120b02e2d92cfa507f7982a45d263a78f5 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 21 Feb 2023 14:45:29 -0500 Subject: [PATCH 02/14] test: test updates sym links Signed-off-by: Christopher Phillips --- pkg/filetree/filenode/filenode.go | 1 + pkg/filetree/filetree.go | 8 +++++++- pkg/filetree/filetree_test.go | 13 ++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go index 14f0fe7a..f7811ea1 100644 --- a/pkg/filetree/filenode/filenode.go +++ b/pkg/filetree/filenode/filenode.go @@ -77,6 +77,7 @@ func (n *FileNode) RenderLinkDestination() file.Path { return "" } + // TODO: do we want to assert on if a destination exists or not here? if n.LinkPath.IsAbsolutePath() { // use links with absolute paths blindly return n.LinkPath diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 9352e8d9..1ea3e744 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -250,6 +250,9 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. +// TODO: don't record what was visited in attempted paths, record the request path before the call +// TODO: memoize the attempted paths where we remove paths that are resolve so they can be seen again +// TODO: add a second set which is visited links which is also checked for attempted paths func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.PathSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution @@ -368,6 +371,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, } // prepare for the next iteration + // already seen is important for the context of this loop alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) nextPath = currentNodeAccess.FileNode.RenderLinkDestination() @@ -381,11 +385,13 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) + // TODO: DOUBT if attemptedPaths.Contains(nextPath) { return nil, ErrLinkCycleDetected } - // get the next Node (based on the next path) + // get the next Node (based on the next path)a + // attempted paths maintains state across calls to resolveAncestorLinks attemptedPaths.Add(nextPath) currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) if err != nil { diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 2d822175..f6ab48dd 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1079,6 +1079,7 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { // the test.... do we stop when a cycle is detected? exists, _, err := tr.File("/somewhere/acorn", FollowBasenameLinks) + require.Error(t, err, "should have gotten an error on resolution of a dead cycle") // TODO: check this case if err != ErrLinkCycleDetected { t.Fatalf("should have gotten an error on resolving a file") @@ -1094,13 +1095,17 @@ func TestFileTree_File_DeadCycleDetection_BAD(t *testing.T) { tr := New() _, err := tr.AddFile("/usr/bin/ksh93") require.NoError(t, err) - _, err = tr.AddSymLink("/bin", "/usr/bin") + + _, err = tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") require.NoError(t, err) - _, err = tr.AddSymLink("/usr/bin/ksh", "/etc/alternatives/ksh") + + _, err = tr.AddSymLink("/bin", "/usr/bin") require.NoError(t, err) + _, err = tr.AddSymLink("/etc/alternatives/ksh", "/bin/ksh93") require.NoError(t, err) - _, err = tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") + + _, err = tr.AddSymLink("/usr/bin/ksh", "/etc/alternatives/ksh") require.NoError(t, err) /* @@ -1110,6 +1115,8 @@ func TestFileTree_File_DeadCycleDetection_BAD(t *testing.T) { /etc/alternatives/ksh -> /bin/ksh93 */ + // + // ls -al /usr/local/bin/ksh // /usr/local/bin/ksh -> /bin/ksh // ls -al /bin/ksh From 74bead221782604eaf1d76e7fe382c11d71281b4 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 21 Feb 2023 15:07:52 -0500 Subject: [PATCH 03/14] change the filetree recursive pathset to represent open calls Signed-off-by: Alex Goodman --- pkg/filetree/filetree.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 1ea3e744..a07fa2b2 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -250,10 +250,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -// TODO: don't record what was visited in attempted paths, record the request path before the call -// TODO: memoize the attempted paths where we remove paths that are resolve so they can be seen again -// TODO: add a second set which is visited links which is also checked for attempted paths -func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, attemptingPaths file.PathSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) @@ -309,7 +306,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.Path // links until the next Node is resolved (or not). isLastPart := idx == len(pathParts)-1 if !isLastPart && currentNodeAccess.FileNode.IsLink() { - currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptedPaths) + currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptingPaths) if err != nil { // only expected to happen on cycles return currentNodeAccess, err @@ -328,14 +325,16 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptedPaths file.Path // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen -func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptedPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptingPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } - // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist - if attemptedPaths == nil { - attemptedPaths = file.NewPathSet() + // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist. + // this represents current link resolution requests that are in progress. This set is pruned once the resolution + // has been completed. + if attemptingPaths == nil { + attemptingPaths = file.NewPathSet() } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks @@ -385,15 +384,14 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) - // TODO: DOUBT - if attemptedPaths.Contains(nextPath) { + if attemptingPaths.Contains(nextPath) { return nil, ErrLinkCycleDetected } // get the next Node (based on the next path)a // attempted paths maintains state across calls to resolveAncestorLinks - attemptedPaths.Add(nextPath) - currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptedPaths) + attemptingPaths.Add(nextPath) + currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptingPaths) if err != nil { if currentNodeAccess != nil { currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) @@ -402,6 +400,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // only expected to occur upon cycle detection return currentNodeAccess, err } + attemptingPaths.Remove(nextPath) } if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks { From c396e5a001fad5d432f9ecc18fb461cd877a6926 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 21 Feb 2023 15:35:16 -0500 Subject: [PATCH 04/14] add another cycle test Signed-off-by: Alex Goodman --- pkg/filetree/filetree.go | 28 ++++++++++--------- pkg/filetree/filetree_test.go | 51 +++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index a07fa2b2..9eff948f 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -250,7 +250,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, attemptingPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPaths file.PathSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) @@ -306,7 +306,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptingPaths file.Pat // links until the next Node is resolved (or not). isLastPart := idx == len(pathParts)-1 if !isLastPart && currentNodeAccess.FileNode.IsLink() { - currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, attemptingPaths) + currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, currentlyResolvingLinkPaths) if err != nil { // only expected to happen on cycles return currentNodeAccess, err @@ -325,7 +325,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, attemptingPaths file.Pat // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen -func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, attemptingPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentlyResolvingLinkPaths file.PathSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } @@ -333,8 +333,8 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing nodes that do not exist. // this represents current link resolution requests that are in progress. This set is pruned once the resolution // has been completed. - if attemptingPaths == nil { - attemptingPaths = file.NewPathSet() + if currentlyResolvingLinkPaths == nil { + currentlyResolvingLinkPaths = file.NewPathSet() } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks @@ -344,8 +344,10 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentNodeAccess := n - // keep resolving links until a regular file or directory is found - alreadySeen := strset.New() + // keep resolving links until a regular file or directory is found. + // Note: this is NOT redundant relative to the 'currentlyResolvingLinkPaths' set. This set is used to short-circuit + // real paths that have been revisited through potentially different links (or really anyway). + realPathsVisited := strset.New() var err error for { nodePath = append(nodePath, *currentNodeAccess) @@ -359,7 +361,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, break } - if alreadySeen.Has(string(currentNodeAccess.FileNode.RealPath)) { + if realPathsVisited.Has(string(currentNodeAccess.FileNode.RealPath)) { return nil, ErrLinkCycleDetected } @@ -371,7 +373,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // prepare for the next iteration // already seen is important for the context of this loop - alreadySeen.Add(string(currentNodeAccess.FileNode.RealPath)) + realPathsVisited.Add(string(currentNodeAccess.FileNode.RealPath)) nextPath = currentNodeAccess.FileNode.RenderLinkDestination() @@ -384,14 +386,14 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, lastNode = currentNodeAccess // break any cycles with non-existent paths (before attempting to look the path up again) - if attemptingPaths.Contains(nextPath) { + if currentlyResolvingLinkPaths.Contains(nextPath) { return nil, ErrLinkCycleDetected } // get the next Node (based on the next path)a // attempted paths maintains state across calls to resolveAncestorLinks - attemptingPaths.Add(nextPath) - currentNodeAccess, err = t.resolveAncestorLinks(nextPath, attemptingPaths) + currentlyResolvingLinkPaths.Add(nextPath) + currentNodeAccess, err = t.resolveAncestorLinks(nextPath, currentlyResolvingLinkPaths) if err != nil { if currentNodeAccess != nil { currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) @@ -400,7 +402,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // only expected to occur upon cycle detection return currentNodeAccess, err } - attemptingPaths.Remove(nextPath) + currentlyResolvingLinkPaths.Remove(nextPath) } if !currentNodeAccess.HasFileNode() && !followDeadBasenameLinks { diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index f6ab48dd..fff09810 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1091,11 +1091,37 @@ func TestFileTree_File_DeadCycleDetection(t *testing.T) { } -func TestFileTree_File_DeadCycleDetection_BAD(t *testing.T) { +func TestFileTree_File_ShortCircuitDeadBasenameLinkCycles(t *testing.T) { tr := New() _, err := tr.AddFile("/usr/bin/ksh93") require.NoError(t, err) + linkPath, err := tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") + require.NoError(t, err) + + _, err = tr.AddSymLink("/bin", "/usr/bin/ksh93") + require.NoError(t, err) + + // note: we follow dead basename links + exists, resolution, err := tr.File("/usr/local/bin/ksh", FollowBasenameLinks) + require.NoError(t, err) + assert.False(t, exists) + assert.False(t, resolution.HasReference()) + + // note: we don't follow dead basename links + exists, resolution, err = tr.File("/usr/local/bin/ksh", FollowBasenameLinks, DoNotFollowDeadBasenameLinks) + require.NoError(t, err) + assert.True(t, exists) + assert.True(t, resolution.HasReference()) + assert.Equal(t, *linkPath, *resolution.Reference) +} + +// regression: Syft issue https://github.com/anchore/syft/issues/1586 +func TestFileTree_File_ResolutionWithMultipleAncestorResolutionsForSameNode(t *testing.T) { + tr := New() + actualRef, err := tr.AddFile("/usr/bin/ksh93") + require.NoError(t, err) + _, err = tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") require.NoError(t, err) @@ -1108,26 +1134,11 @@ func TestFileTree_File_DeadCycleDetection_BAD(t *testing.T) { _, err = tr.AddSymLink("/usr/bin/ksh", "/etc/alternatives/ksh") require.NoError(t, err) - /* - /usr/bin/ksh93 <-- Real file - /bin -> /usr/bin - /usr/bin/ksh -> /etc/alternatives/ksh - /etc/alternatives/ksh -> /bin/ksh93 - */ - - // - - // ls -al /usr/local/bin/ksh - // /usr/local/bin/ksh -> /bin/ksh - // ls -al /bin/ksh - // /bin/ksh -> /etc/alternatives/ksh - // /etc/alternatives/ksh - // /etc/alternatives/ksh -> /bin/ksh93 - // /bin/ksh93 - - // the test.... do we stop when a cycle is detected? - _, _, err = tr.File("/usr/local/bin/ksh", FollowBasenameLinks) + exists, resolution, err := tr.File("/usr/local/bin/ksh", FollowBasenameLinks) require.NoError(t, err) + assert.True(t, exists) + assert.True(t, resolution.HasReference()) + assert.Equal(t, *actualRef, *resolution.Reference) } func TestFileTree_AllFiles(t *testing.T) { From 33b9c974130a59905f367a6cd7c2317c15fb4255 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 21 Feb 2023 17:05:50 -0500 Subject: [PATCH 05/14] change filetree attempting path set to counters Signed-off-by: Alex Goodman --- pkg/file/path_set.go | 36 +++++++++++++++++++++++++++++++++ pkg/file/path_set_test.go | 42 +++++++++++++++++++++++++++++++++++++++ pkg/filetree/filetree.go | 6 +++--- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index a46f342a..89c923ad 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -75,3 +75,39 @@ func (s PathSet) ContainsAny(ids ...Path) bool { } return false } + +type PathCountSet map[Path]int + +func NewPathCountSet(is ...Path) PathCountSet { + s := make(PathCountSet) + s.Add(is...) + return s +} + +func (s PathCountSet) Add(ids ...Path) { + for _, i := range ids { + if _, ok := s[i]; !ok { + s[i] = 1 + continue + } + s[i] += 1 + } +} + +func (s PathCountSet) Remove(ids ...Path) { + for _, i := range ids { + if _, ok := s[i]; !ok { + continue + } + + s[i]-- + if s[i] <= 0 { + delete(s, i) + } + } +} + +func (s PathCountSet) Contains(i Path) bool { + count, ok := s[i] + return ok && count > 0 +} diff --git a/pkg/file/path_set_test.go b/pkg/file/path_set_test.go index 5d296649..1e578911 100644 --- a/pkg/file/path_set_test.go +++ b/pkg/file/path_set_test.go @@ -224,3 +224,45 @@ func TestPathSet_ContainsAny(t *testing.T) { }) } } + +func TestPathCountSet(t *testing.T) { + s := NewPathCountSet() + + s.Add("a", "b") // {a: 1, b: 1} + assert.True(t, s.Contains("a")) + assert.True(t, s.Contains("b")) + assert.False(t, s.Contains("c")) + + s.Add("a", "c") // {a: 2, b: 1, c: 1} + assert.True(t, s.Contains("a")) + assert.True(t, s.Contains("b")) + assert.True(t, s.Contains("c")) + + s.Remove("a") // {a: 1, b: 1, c: 1} + assert.True(t, s.Contains("a")) + assert.True(t, s.Contains("b")) + assert.True(t, s.Contains("c")) + + s.Remove("a", "b") // {c: 1} + assert.False(t, s.Contains("a")) + assert.False(t, s.Contains("b")) + assert.True(t, s.Contains("c")) + + s.Remove("a", "b", "v", "c") // {} + assert.False(t, s.Contains("a")) + assert.False(t, s.Contains("b")) + assert.False(t, s.Contains("c")) + + s.Add("a", "a", "a", "a") // {a: 4} + assert.True(t, s.Contains("a")) + assert.Equal(t, 4, s["a"]) + + s.Remove("a", "a", "a") // {a: 1} + assert.True(t, s.Contains("a")) + + s.Remove("a", "a", "a", "a") // {} + assert.False(t, s.Contains("a")) + + s.Remove("a", "a", "a", "a", "a", "a", "a", "a") // {} + assert.False(t, s.Contains("a")) +} diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 9eff948f..141040c8 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -250,7 +250,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPaths file.PathCountSet) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) @@ -325,7 +325,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPa // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen -func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentlyResolvingLinkPaths file.PathSet) (*nodeAccess, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentlyResolvingLinkPaths file.PathCountSet) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } @@ -334,7 +334,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // this represents current link resolution requests that are in progress. This set is pruned once the resolution // has been completed. if currentlyResolvingLinkPaths == nil { - currentlyResolvingLinkPaths = file.NewPathSet() + currentlyResolvingLinkPaths = file.NewPathCountSet() } // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks From 4725d6a8947067a9ec7a69a7b5e56a8817a992d6 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 21 Feb 2023 17:07:11 -0500 Subject: [PATCH 06/14] remove comment Signed-off-by: Alex Goodman --- pkg/filetree/filenode/filenode.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/filetree/filenode/filenode.go b/pkg/filetree/filenode/filenode.go index f7811ea1..14f0fe7a 100644 --- a/pkg/filetree/filenode/filenode.go +++ b/pkg/filetree/filenode/filenode.go @@ -77,7 +77,6 @@ func (n *FileNode) RenderLinkDestination() file.Path { return "" } - // TODO: do we want to assert on if a destination exists or not here? if n.LinkPath.IsAbsolutePath() { // use links with absolute paths blindly return n.LinkPath From 1f8e0c93ede8e39872c8f117f052b03d9223f76c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 21 Feb 2023 17:07:50 -0500 Subject: [PATCH 07/14] fix linting Signed-off-by: Alex Goodman --- pkg/file/path_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/file/path_set.go b/pkg/file/path_set.go index 89c923ad..f4a8084d 100644 --- a/pkg/file/path_set.go +++ b/pkg/file/path_set.go @@ -90,7 +90,7 @@ func (s PathCountSet) Add(ids ...Path) { s[i] = 1 continue } - s[i] += 1 + s[i]++ } } From 9b29c6579ba164094bcd784754367e827e3bdb34 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 21 Feb 2023 16:50:39 -0500 Subject: [PATCH 08/14] feat: decrement stack depth Signed-off-by: Christopher Phillips --- pkg/filetree/filetree.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 141040c8..5f993d84 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -20,6 +20,7 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") +var MaxLinkDepth = 100 // FileTree represents a file/directory Tree type FileTree struct { @@ -213,7 +214,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce var currentNode *nodeAccess var err error if strategy.FollowAncestorLinks { - currentNode, err = t.resolveAncestorLinks(normalizedPath, nil) + currentNode, err = t.resolveAncestorLinks(normalizedPath, nil, MaxLinkDepth) if err != nil { if currentNode != nil { currentNode.RequestPath = normalizedPath @@ -239,7 +240,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce } if strategy.FollowBasenameLinks { - currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil) + currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil, MaxLinkDepth) } if currentNode != nil { currentNode.RequestPath = normalizedPath @@ -250,7 +251,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce // return FileNode of the basename in the given path (no resolution is done at or past the basename). Note: it is // assumed that the given path has already been normalized. -func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPaths file.PathCountSet) (*nodeAccess, error) { +func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPaths file.PathCountSet, maxLinkDepth int) (*nodeAccess, error) { // performance optimization... see if there is a node at the path (as if it is a real path). If so, // use it, otherwise, continue with ancestor resolution currentNodeAccess, err := t.node(path, linkResolutionStrategy{}) @@ -306,7 +307,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPa // links until the next Node is resolved (or not). isLastPart := idx == len(pathParts)-1 if !isLastPart && currentNodeAccess.FileNode.IsLink() { - currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, currentlyResolvingLinkPaths) + currentNodeAccess, err = t.resolveNodeLinks(currentNodeAccess, true, currentlyResolvingLinkPaths, maxLinkDepth) if err != nil { // only expected to happen on cycles return currentNodeAccess, err @@ -325,7 +326,7 @@ func (t *FileTree) resolveAncestorLinks(path file.Path, currentlyResolvingLinkPa // resolveNodeLinks takes the given FileNode and resolves all links at the base of the real path for the node (this implies // that NO ancestors are considered). // nolint: funlen -func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentlyResolvingLinkPaths file.PathCountSet) (*nodeAccess, error) { +func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, currentlyResolvingLinkPaths file.PathCountSet, maxLinkDepth int) (*nodeAccess, error) { if n == nil { return nil, fmt.Errorf("cannot resolve links with nil Node given") } @@ -336,6 +337,8 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, if currentlyResolvingLinkPaths == nil { currentlyResolvingLinkPaths = file.NewPathCountSet() } + // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing + maxLinkDepth-- // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks var lastNode *nodeAccess @@ -390,10 +393,14 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, return nil, ErrLinkCycleDetected } - // get the next Node (based on the next path)a + if maxLinkDepth < 1 { + return nil, ErrLinkCycleDetected + } + + // get the next Node (based on the next path) // attempted paths maintains state across calls to resolveAncestorLinks currentlyResolvingLinkPaths.Add(nextPath) - currentNodeAccess, err = t.resolveAncestorLinks(nextPath, currentlyResolvingLinkPaths) + currentNodeAccess, err = t.resolveAncestorLinks(nextPath, currentlyResolvingLinkPaths, maxLinkDepth) if err != nil { if currentNodeAccess != nil { currentNodeAccess.LeafLinkResolution = append(currentNodeAccess.LeafLinkResolution, nodePath...) From 5dc3c1431be4bc4df65f79fa551af94616fd5eea Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 21 Feb 2023 17:17:39 -0500 Subject: [PATCH 09/14] test: remove old wip test name Signed-off-by: Christopher Phillips --- pkg/filetree/filetree_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index fff09810..0a39b591 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1135,6 +1135,24 @@ func TestFileTree_File_ResolutionWithMultipleAncestorResolutionsForSameNode(t *t require.NoError(t, err) exists, resolution, err := tr.File("/usr/local/bin/ksh", FollowBasenameLinks) + /* + /usr/bin/ksh93 <-- Real file + /bin -> /usr/bin + /usr/bin/ksh -> /etc/alternatives/ksh + /etc/alternatives/ksh -> /bin/ksh93 + */ + + // ls -al /usr/local/bin/ksh + // /usr/local/bin/ksh -> /bin/ksh + + // ls -al /bin/ksh + // /bin/ksh -> /etc/alternatives/ksh + + // ls -al /etc/alternatives/ksh + // /etc/alternatives/ksh -> /bin/ksh93 + + // the test.... we should not stop when a small cycle for /usr/bin is done more than once + _, _, err = tr.File("/usr/local/bin/ksh", FollowBasenameLinks) require.NoError(t, err) assert.True(t, exists) assert.True(t, resolution.HasReference()) From f7d2b176e5cdf4a8f79ce75d1aa6885e2c13cd60 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 22 Feb 2023 10:34:32 -0500 Subject: [PATCH 10/14] chore: style updates Signed-off-by: Christopher Phillips --- pkg/filetree/filetree_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 40aeb026..682fcd02 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1135,7 +1135,7 @@ func TestFileTree_File_ResolutionWithMultipleAncestorResolutionsForSameNode(t *t require.NoError(t, err) exists, resolution, err := tr.File("/usr/local/bin/ksh", FollowBasenameLinks) - require.NoError(t, err) + require.NoError(t, err) /* /usr/bin/ksh93 <-- Real file /bin -> /usr/bin From 656df6e7aba74edee6d4e9ceca753939bcb4dc6c Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 22 Feb 2023 10:41:25 -0500 Subject: [PATCH 11/14] feat: move maxLinkDepth decrement to inside ancestor loop Signed-off-by: Christopher Phillips --- pkg/filetree/filetree.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 5f993d84..5af4cc20 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -337,8 +337,6 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, if currentlyResolvingLinkPaths == nil { currentlyResolvingLinkPaths = file.NewPathCountSet() } - // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing - maxLinkDepth-- // note: this assumes that callers are passing paths in which the constituent parts are NOT symlinks var lastNode *nodeAccess @@ -393,6 +391,9 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, return nil, ErrLinkCycleDetected } + // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing + // this is counted across all calls to resolveAncestorLinks and resolveNodeLinks + maxLinkDepth-- if maxLinkDepth < 1 { return nil, ErrLinkCycleDetected } From 26b00ab12ccb6059d6ae70f2e57c2aec2e26bb9f Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 22 Feb 2023 11:22:21 -0500 Subject: [PATCH 12/14] feat: move maxLinkDepth decrement into resolveNodeLinks loop Signed-off-by: Christopher Phillips --- pkg/filetree/filetree.go | 8 ++++---- pkg/filetree/filetree_test.go | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 5af4cc20..8baaeff3 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -20,6 +20,7 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") +var ErrStackDepthExceeded = errors.New("maximum link resolution stack depth exceeded") var MaxLinkDepth = 100 // FileTree represents a file/directory Tree @@ -342,7 +343,6 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, var lastNode *nodeAccess var nodePath []nodeAccess var nextPath file.Path - currentNodeAccess := n // keep resolving links until a regular file or directory is found. @@ -391,11 +391,11 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, return nil, ErrLinkCycleDetected } - // we need to short-circuit link resolution that never resolves (cycles) due to a cycle referencing - // this is counted across all calls to resolveAncestorLinks and resolveNodeLinks + // we need to short-circuit link resolution that never resolves (depth) due to a cycle referencing + // maxLinkDepth is counted across all calls to resolveAncestorLinks and resolveNodeLinks maxLinkDepth-- if maxLinkDepth < 1 { - return nil, ErrLinkCycleDetected + return nil, ErrStackDepthExceeded } // get the next Node (based on the next path) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 682fcd02..3b5b9e57 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -2,6 +2,7 @@ package filetree import ( "errors" + "fmt" "github.com/scylladb/go-set/strset" "testing" @@ -1160,6 +1161,23 @@ func TestFileTree_File_ResolutionWithMultipleAncestorResolutionsForSameNode(t *t assert.Equal(t, *actualRef, *resolution.Reference) } +func TestFileTree_MaxDepthLimit(t *testing.T) { + tr := New() + _, err := tr.AddFile("/usr/bin/ksh") + + for i := 0; i < 10; i++ { + _, err = tr.AddSymLink( + file.Path(fmt.Sprintf("/usr/local/ksh%d", i)), + file.Path(fmt.Sprintf("/usr/bin/ksh%d", i+1)), + ) + require.NoError(t, err) + } + _, err = tr.AddSymLink("/usr/bin/ksh10", "/usr/bin/ksh") + + _, _, err = tr.File("/usr/local/ksh0", FollowBasenameLinks) + require.NoError(t, err) +} + func TestFileTree_AllFiles(t *testing.T) { tr := New() From fa5c69b8ca8dbfc96313cc1e5b9262bfd8c6a244 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 22 Feb 2023 13:31:49 -0500 Subject: [PATCH 13/14] feat: move detection to top and write minimal test case Signed-off-by: Christopher Phillips --- pkg/filetree/filetree.go | 14 +++++++------- pkg/filetree/filetree_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 8baaeff3..54e9595b 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -351,6 +351,13 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, realPathsVisited := strset.New() var err error for { + // we need to short-circuit link resolution that never resolves (depth) due to a cycle referencing + // maxLinkDepth is counted across all calls to resolveAncestorLinks and resolveNodeLinks + maxLinkDepth-- + if maxLinkDepth < 1 { + return nil, ErrStackDepthExceeded + } + nodePath = append(nodePath, *currentNodeAccess) // if there is no next path, return this reference (dead link) @@ -391,13 +398,6 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, return nil, ErrLinkCycleDetected } - // we need to short-circuit link resolution that never resolves (depth) due to a cycle referencing - // maxLinkDepth is counted across all calls to resolveAncestorLinks and resolveNodeLinks - maxLinkDepth-- - if maxLinkDepth < 1 { - return nil, ErrStackDepthExceeded - } - // get the next Node (based on the next path) // attempted paths maintains state across calls to resolveAncestorLinks currentlyResolvingLinkPaths.Add(nextPath) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index 3b5b9e57..beae35c8 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1161,6 +1161,31 @@ func TestFileTree_File_ResolutionWithMultipleAncestorResolutionsForSameNode(t *t assert.Equal(t, *actualRef, *resolution.Reference) } +func TestFileTreeMaxLinkDepth(t *testing.T) { + tr := New() + _, err := tr.AddFile("/usr/bin/ksh93") + require.NoError(t, err) + + _, err = tr.AddSymLink("/usr/local/bin/ksh", "/bin/ksh") + require.NoError(t, err) + + _, err = tr.AddSymLink("/bin", "/usr/bin") + require.NoError(t, err) + + _, err = tr.AddSymLink("/etc/alternatives/ksh", "/bin/ksh93") + require.NoError(t, err) + + _, err = tr.AddSymLink("/usr/bin/ksh", "/etc/alternatives/ksh") + require.NoError(t, err) + rs := linkResolutionStrategy{} + + currentNode, err := tr.node("/usr/local/bin/ksh", rs) + require.NoError(t, err) + + _, err = tr.resolveNodeLinks(currentNode, !rs.DoNotFollowDeadBasenameLinks, nil, 2) + require.Error(t, err) +} + func TestFileTree_MaxDepthLimit(t *testing.T) { tr := New() _, err := tr.AddFile("/usr/bin/ksh") From ad10e8a3231129fcb8d134445c237934cd656ebc Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 22 Feb 2023 13:48:37 -0500 Subject: [PATCH 14/14] test: update linkResolution test to use internal value Signed-off-by: Christopher Phillips --- pkg/filetree/filetree.go | 10 +++++----- pkg/filetree/filetree_test.go | 27 ++++++++++++++++++++------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/pkg/filetree/filetree.go b/pkg/filetree/filetree.go index 54e9595b..e2ea64a5 100644 --- a/pkg/filetree/filetree.go +++ b/pkg/filetree/filetree.go @@ -20,8 +20,8 @@ import ( var ErrRemovingRoot = errors.New("cannot remove the root path (`/`) from the FileTree") var ErrLinkCycleDetected = errors.New("cycle during symlink resolution") -var ErrStackDepthExceeded = errors.New("maximum link resolution stack depth exceeded") -var MaxLinkDepth = 100 +var ErrLinkResolutionDepth = errors.New("maximum link resolution stack depth exceeded") +var maxLinkResolutionDepth = 100 // FileTree represents a file/directory Tree type FileTree struct { @@ -215,7 +215,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce var currentNode *nodeAccess var err error if strategy.FollowAncestorLinks { - currentNode, err = t.resolveAncestorLinks(normalizedPath, nil, MaxLinkDepth) + currentNode, err = t.resolveAncestorLinks(normalizedPath, nil, maxLinkResolutionDepth) if err != nil { if currentNode != nil { currentNode.RequestPath = normalizedPath @@ -241,7 +241,7 @@ func (t *FileTree) node(p file.Path, strategy linkResolutionStrategy) (*nodeAcce } if strategy.FollowBasenameLinks { - currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil, MaxLinkDepth) + currentNode, err = t.resolveNodeLinks(currentNode, !strategy.DoNotFollowDeadBasenameLinks, nil, maxLinkResolutionDepth) } if currentNode != nil { currentNode.RequestPath = normalizedPath @@ -355,7 +355,7 @@ func (t *FileTree) resolveNodeLinks(n *nodeAccess, followDeadBasenameLinks bool, // maxLinkDepth is counted across all calls to resolveAncestorLinks and resolveNodeLinks maxLinkDepth-- if maxLinkDepth < 1 { - return nil, ErrStackDepthExceeded + return nil, ErrLinkResolutionDepth } nodePath = append(nodePath, *currentNodeAccess) diff --git a/pkg/filetree/filetree_test.go b/pkg/filetree/filetree_test.go index beae35c8..a6aac4a7 100644 --- a/pkg/filetree/filetree_test.go +++ b/pkg/filetree/filetree_test.go @@ -1183,24 +1183,37 @@ func TestFileTreeMaxLinkDepth(t *testing.T) { require.NoError(t, err) _, err = tr.resolveNodeLinks(currentNode, !rs.DoNotFollowDeadBasenameLinks, nil, 2) - require.Error(t, err) + require.Error(t, err, "should have gotten an error on resolution of a dead cycle") + // require certain error + if err != ErrLinkResolutionDepth { + t.Fatalf("should have gotten an ErrLinkResolutionDepth resolving a file") + } } -func TestFileTree_MaxDepthLimit(t *testing.T) { +func TestFileTree_MaximumLinkResolutionExceeded(t *testing.T) { tr := New() - _, err := tr.AddFile("/usr/bin/ksh") + ref, err := tr.AddFile("/usr/bin/ksh") - for i := 0; i < 10; i++ { + // we hard code this in filetree to 100 + for i := 0; i < maxLinkResolutionDepth; i++ { _, err = tr.AddSymLink( - file.Path(fmt.Sprintf("/usr/local/ksh%d", i)), + file.Path(fmt.Sprintf("/usr/bin/ksh%d", i)), file.Path(fmt.Sprintf("/usr/bin/ksh%d", i+1)), ) require.NoError(t, err) } - _, err = tr.AddSymLink("/usr/bin/ksh10", "/usr/bin/ksh") - _, _, err = tr.File("/usr/local/ksh0", FollowBasenameLinks) + // explicitly link 10 to ksh + _, err = tr.AddSymLink("/usr/bin/ksh100", "/usr/bin/ksh") + + // we should be short-circuiting the resolution of multiple SymLinks > 100 + _, _, err = tr.File("/usr/bin/ksh0", FollowBasenameLinks) + require.Error(t, err) + + b, fr, err := tr.File("/usr/bin/ksh90", FollowBasenameLinks) require.NoError(t, err) + assert.True(t, b) + assert.Equal(t, ref, fr.Reference) } func TestFileTree_AllFiles(t *testing.T) {