From c849112b00e91b72738b860379ca6d8ae8808f66 Mon Sep 17 00:00:00 2001 From: Kudryavcev Nikolay <35200428+Rupikz@users.noreply.github.com> Date: Fri, 14 Nov 2025 02:04:43 +0300 Subject: [PATCH 1/2] chore: migrate syft to use mholt/archives instead of anchore fork (#4029) --------- Signed-off-by: Kudryavcev Nikolay Signed-off-by: Christopher Phillips Signed-off-by: Alex Goodman --- README.md | 4 +- go.mod | 17 +- go.sum | 27 +- internal/file/tar_file_traversal.go | 41 +- internal/file/zip_file_manifest.go | 17 +- internal/file/zip_file_manifest_test.go | 7 +- internal/file/zip_file_traversal.go | 114 ++-- internal/file/zip_file_traversal_test.go | 528 +++++++++++++++++- internal/file/zip_read_closer.go | 229 -------- internal/file/zip_read_closer_test.go | 50 -- internal/task/unknowns_tasks.go | 8 +- syft/format/github/internal/model/model.go | 8 +- syft/pkg/cataloger/java/archive_parser.go | 29 +- .../pkg/cataloger/java/archive_parser_test.go | 31 +- .../java/tar_wrapped_archive_parser.go | 2 +- .../java/zip_wrapped_archive_parser.go | 2 +- syft/source/filesource/file_source.go | 67 ++- 17 files changed, 754 insertions(+), 427 deletions(-) delete mode 100644 internal/file/zip_read_closer.go delete mode 100644 internal/file/zip_read_closer_test.go diff --git a/README.md b/README.md index 43cba0a34ce..557d903bcbd 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,8 @@ syft -o Where the `formats` available are: - `syft-json`: Use this to get as much information out of Syft as possible! - `syft-text`: A row-oriented, human-and-machine-friendly output. -- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). -- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). +- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). +- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/). diff --git a/go.mod b/go.mod index 9439a4883a9..d674cc3e965 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/acobaugh/osrelease v0.1.0 github.com/adrg/xdg v0.5.3 - github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2 @@ -156,7 +155,7 @@ require ( github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect - github.com/fatih/color v1.17.0 // indirect + github.com/fatih/color v1.18.0 // indirect github.com/felixge/fgprof v0.9.5 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect @@ -170,7 +169,6 @@ require ( github.com/goccy/go-yaml v1.18.0 github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect - github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/google/s2a-go v0.1.8 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect @@ -192,7 +190,7 @@ require ( github.com/logrusorgru/aurora v2.0.3+incompatible // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/maruel/natural v1.1.1 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect @@ -213,10 +211,6 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect - github.com/nwaples/rardecode v1.1.3 // indirect - github.com/nwaples/rardecode/v2 v2.2.1 // indirect - github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 // indirect - github.com/olekukonko/ll v0.0.8 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/selinux v1.13.1 // indirect @@ -289,6 +283,13 @@ require ( modernc.org/memory v1.11.0 // indirect ) +require ( + github.com/nwaples/rardecode/v2 v2.2.0 // indirect + github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect + github.com/olekukonko/errors v1.1.0 // indirect + github.com/olekukonko/ll v0.1.2 // indirect +) + retract ( v1.25.0 // published with a replace directive (confusing for API users) v0.53.2 diff --git a/go.sum b/go.sum index dbe3fee2268..8bbd58cdfbe 100644 --- a/go.sum +++ b/go.sum @@ -664,8 +664,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw= github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA= @@ -913,8 +911,8 @@ github.com/facebookincubator/nvdtools v0.1.5/go.mod h1:Kh55SAWnjckS96TBSrXI99KrE github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= -github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA= github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= @@ -1033,7 +1031,6 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -1260,8 +1257,8 @@ github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVc github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= @@ -1342,14 +1339,14 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= -github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= -github.com/nwaples/rardecode/v2 v2.2.1 h1:DgHK/O/fkTQEKBJxBMC5d9IU8IgauifbpG78+rZJMnI= -github.com/nwaples/rardecode/v2 v2.2.1/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= -github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 h1:r3FaAI0NZK3hSmtTDrBVREhKULp8oUeqLT5Eyl2mSPo= -github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y= -github.com/olekukonko/ll v0.0.8 h1:sbGZ1Fx4QxJXEqL/6IG8GEFnYojUSQ45dJVwN2FH2fc= -github.com/olekukonko/ll v0.0.8/go.mod h1:En+sEW0JNETl26+K8eZ6/W4UQ7CYSrrgg/EdIYT2H8g= +github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A= +github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= +github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc= +github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6/go.mod h1:rEKTHC9roVVicUIfZK7DYrdIoM0EOr8mK1Hj5s3JjH0= +github.com/olekukonko/errors v1.1.0 h1:RNuGIh15QdDenh+hNvKrJkmxxjV4hcS50Db478Ou5sM= +github.com/olekukonko/errors v1.1.0/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y= +github.com/olekukonko/ll v0.1.2 h1:lkg/k/9mlsy0SxO5aC+WEpbdT5K83ddnNhAepz7TQc0= +github.com/olekukonko/ll v0.1.2/go.mod h1:b52bVQRRPObe+yyBl0TxNfhesL0nedD4Cht0/zx55Ew= github.com/olekukonko/tablewriter v1.0.7 h1:HCC2e3MM+2g72M81ZcJU11uciw6z/p82aEnm4/ySDGw= github.com/olekukonko/tablewriter v1.0.7/go.mod h1:H428M+HzoUXC6JU2Abj9IT9ooRmdq9CxuDmKMtrOCMs= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go index 7d211168a1e..c3511a1cc29 100644 --- a/internal/file/tar_file_traversal.go +++ b/internal/file/tar_file_traversal.go @@ -1,17 +1,40 @@ package file import ( + "context" "fmt" "os" "path/filepath" "github.com/bmatcuk/doublestar/v4" + "github.com/mholt/archives" - "github.com/anchore/archiver/v3" + "github.com/anchore/syft/internal" ) +// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern. +func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error { + tarReader, err := os.Open(archivePath) + if err != nil { + return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err) + } + defer internal.CloseAndLogError(tarReader, archivePath) + + format, _, err := archives.Identify(ctx, archivePath, nil) + if err != nil { + return fmt.Errorf("failed to identify tar compression format: %w", err) + } + + extractor, ok := format.(archives.Extractor) + if !ok { + return fmt.Errorf("file format does not support extraction: %s", archivePath) + } + + return extractor.Extract(ctx, tarReader, visitor) +} + // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { +func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return results, nil } - visitor := func(file archiver.File) error { - defer file.Close() - + visitor := func(_ context.Context, file archives.FileInfo) error { // ignore directories if file.IsDir() { return nil @@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. defer tempFile.Close() - if err := safeCopy(tempFile, file.ReadCloser); err != nil { + packedFile, err := file.Open() + if err != nil { + return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err) + } + defer internal.CloseAndLogError(packedFile, archivePath) + + if err := safeCopy(tempFile, packedFile); err != nil { return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) } @@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return nil } - return results, archiver.Walk(archivePath, visitor) + return results, TraverseFilesInTar(ctx, archivePath, visitor) } func matchesAnyGlob(name string, globs ...string) bool { diff --git a/internal/file/zip_file_manifest.go b/internal/file/zip_file_manifest.go index 346e661c61f..8dcb0d2f224 100644 --- a/internal/file/zip_file_manifest.go +++ b/internal/file/zip_file_manifest.go @@ -1,10 +1,12 @@ package file import ( + "context" "os" "sort" "strings" + "github.com/mholt/archives" "github.com/scylladb/go-set/strset" "github.com/anchore/syft/internal/log" @@ -14,22 +16,25 @@ import ( type ZipFileManifest map[string]os.FileInfo // NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path. -func NewZipFileManifest(archivePath string) (ZipFileManifest, error) { - zipReader, err := OpenZip(archivePath) +func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) { + zipReader, err := os.Open(archivePath) manifest := make(ZipFileManifest) if err != nil { log.Debugf("unable to open zip archive (%s): %v", archivePath, err) return manifest, err } defer func() { - err = zipReader.Close() - if err != nil { + if err = zipReader.Close(); err != nil { log.Debugf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { - manifest.Add(file.Name, file.FileInfo()) + err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error { + manifest.Add(file.NameInArchive, file.FileInfo) + return nil + }) + if err != nil { + return manifest, err } return manifest, nil } diff --git a/internal/file/zip_file_manifest_test.go b/internal/file/zip_file_manifest_test.go index 75d4452288f..9ebe4222416 100644 --- a/internal/file/zip_file_manifest_test.go +++ b/internal/file/zip_file_manifest_test.go @@ -4,6 +4,7 @@ package file import ( + "context" "encoding/json" "os" "path" @@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) { sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") archiveFilePath := setupZipFileTest(t, sourceDirPath, true) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - z, err := NewZipFileManifest(archiveFilePath) + z, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } diff --git a/internal/file/zip_file_traversal.go b/internal/file/zip_file_traversal.go index 1b712eff5c4..5fc26a2202d 100644 --- a/internal/file/zip_file_traversal.go +++ b/internal/file/zip_file_traversal.go @@ -1,13 +1,15 @@ package file import ( - "archive/zip" "bytes" + "context" "fmt" "os" "path/filepath" "strings" + "github.com/mholt/archives" + "github.com/anchore/syft/internal/log" ) @@ -25,7 +27,7 @@ type errZipSlipDetected struct { } func (e *errZipSlipDetected) Error() string { - return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) + return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) } type zipTraversalRequest map[string]struct{} @@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest { } // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. -func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { +func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error { request := newZipTraverseRequest(paths...) - zipReader, err := OpenZip(archivePath) + zipReader, err := os.Open(archivePath) if err != nil { return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) } defer func() { - err = zipReader.Close() - if err != nil { + if err := zipReader.Close(); err != nil { log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { + return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error { // if no paths are given then assume that all files should be traversed if len(paths) > 0 { - if _, ok := request[file.Name]; !ok { + if _, ok := request[file.NameInArchive]; !ok { // this file path is not of interest - continue + return nil } } - if err = visitor(file); err != nil { - return err - } - } - return nil + return visitor(ctx, file) + }) } // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { +func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m return results, nil } - visitor := func(file *zip.File) error { - tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" - + visitor := func(_ context.Context, file archives.FileInfo) error { + tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-" tempFile, err := os.CreateTemp(dir, tempfilePrefix) if err != nil { return fmt.Errorf("unable to create temp file: %w", err) @@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } defer func() { - err := zippedFile.Close() - if err != nil { - log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err) + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) } }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } if err := safeCopy(tempFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = Opener{path: tempFile.Name()} + results[file.NameInArchive] = Opener{path: tempFile.Name()} return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. -func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { +func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) { results := make(map[string]string) // don't allow for full traversal, only select traversal from given paths @@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er return results, nil } - visitor := func(file *zip.File) error { + visitor := func(_ context.Context, file archives.FileInfo) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } var buffer bytes.Buffer if err := safeCopy(&buffer, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = buffer.String() + results[file.NameInArchive] = buffer.String() - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // UnzipToDir extracts a zip archive to a target directory. -func UnzipToDir(archivePath, targetDir string) error { - visitor := func(file *zip.File) error { - joinedPath, err := safeJoin(targetDir, file.Name) +func UnzipToDir(ctx context.Context, archivePath, targetDir string) error { + visitor := func(_ context.Context, file archives.FileInfo) error { + joinedPath, err := SafeJoin(targetDir, file.NameInArchive) if err != nil { return err } @@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error { return extractSingleFile(file, joinedPath, archivePath) } - return TraverseFilesInZip(archivePath, visitor) + return TraverseFilesInZip(ctx, archivePath, visitor) } -// safeJoin ensures that any destinations do not resolve to a path above the prefix path. -func safeJoin(prefix string, dest ...string) (string, error) { +// SafeJoin ensures that any destinations do not resolve to a path above the prefix path. +func SafeJoin(prefix string, dest ...string) (string, error) { joinResult := filepath.Join(append([]string{prefix}, dest...)...) cleanJoinResult := filepath.Clean(joinResult) if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) { @@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) { return joinResult, nil } -func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { +func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { + if file.IsDir() { err = os.MkdirAll(expandedFilePath, file.Mode()) if err != nil { return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) @@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err if err != nil { return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) } + defer func() { + if err := outputFile.Close(); err != nil { + log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err) + } + }() if err := safeCopy(outputFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) - } - - err = outputFile.Close() - if err != nil { - return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err) + return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err) } } - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } diff --git a/internal/file/zip_file_traversal_test.go b/internal/file/zip_file_traversal_test.go index d5a81d27309..812f5e4504d 100644 --- a/internal/file/zip_file_traversal_test.go +++ b/internal/file/zip_file_traversal_test.go @@ -4,6 +4,8 @@ package file import ( + "archive/zip" + "context" "crypto/sha256" "encoding/json" "errors" @@ -17,6 +19,7 @@ import ( "github.com/go-test/deep" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func equal(r1, r2 io.Reader) (bool, error) { @@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) { expectedPaths := len(expectedZipArchiveEntries) observedPaths := 0 - err = UnzipToDir(archiveFilePath, unzipDestinationDir) + err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir) if err != nil { t.Fatalf("unable to unzip archive: %+v", err) } @@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) { paths = append(paths, p) } - actual, err := ContentsFromZip(archivePath, paths...) + actual, err := ContentsFromZip(context.Background(), archivePath, paths...) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) { for _, test := range tests { t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) { - actual, err := safeJoin(test.prefix, test.args...) + actual, err := SafeJoin(test.prefix, test.args...) test.errAssertion(t, err) assert.Equal(t, test.expected, actual) }) } } + +// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based +// directory traversal attacks by validating that archive entry paths cannot escape +// the extraction directory. +func TestSafeJoin_SymlinkProtection(t *testing.T) { + tests := []struct { + name string + archivePath string // Path as it would appear in the archive + expectError bool + description string + }{ + { + name: "path traversal via ../", + archivePath: "../../../outside/file.txt", + expectError: true, + description: "Archive entry with ../ trying to escape extraction dir", + }, + { + name: "absolute path symlink target", + archivePath: "../../../sensitive.txt", + expectError: true, + description: "Simulates symlink pointing outside via relative path", + }, + { + name: "safe relative path within extraction dir", + archivePath: "subdir/safe.txt", + expectError: false, + description: "Normal file path that stays within extraction directory", + }, + { + name: "safe path with internal ../", + archivePath: "dir1/../dir2/file.txt", + expectError: false, + description: "Path with ../ that still resolves within extraction dir", + }, + { + name: "deeply nested traversal", + archivePath: "../../../../../../tmp/evil.txt", + expectError: true, + description: "Multiple levels of ../ trying to escape", + }, + { + name: "single parent directory escape", + archivePath: "../", + expectError: true, + description: "Simple one-level escape attempt", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temp directories to simulate extraction scenario + tmpDir := t.TempDir() + extractDir := filepath.Join(tmpDir, "extract") + outsideDir := filepath.Join(tmpDir, "outside") + + require.NoError(t, os.MkdirAll(extractDir, 0755)) + require.NoError(t, os.MkdirAll(outsideDir, 0755)) + + // Create a file outside extraction dir that an attacker might target + outsideFile := filepath.Join(outsideDir, "sensitive.txt") + require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644)) + + // Test SafeJoin - this is what happens when processing archive entries + result, err := SafeJoin(extractDir, tt.archivePath) + + if tt.expectError { + // Should block malicious paths + require.Error(t, err, "Expected SafeJoin to reject malicious path") + var zipSlipErr *errZipSlipDetected + assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type") + assert.Empty(t, result, "Result should be empty for blocked paths") + } else { + // Should allow safe paths + require.NoError(t, err, "Expected SafeJoin to allow safe path") + assert.NotEmpty(t, result, "Result should not be empty for safe paths") + assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)), + "Safe path should resolve within extraction directory") + } + }) + } +} + +// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives +// containing symlink entries that attempt path traversal attacks. +// +// EXPECTED BEHAVIOR: UnzipToDir should either: +// 1. Detect and reject symlinks explicitly with a security error, OR +// 2. Extract them safely (library converts symlinks to regular files) +func TestUnzipToDir_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + fileName string + errContains string + }{ + { + name: "direct symlink to outside directory", + symlinkName: "evil_link", + fileName: "evil_link/payload.txt", + errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory) + }, + { + name: "directory symlink attack", + symlinkName: "safe_dir/link", + fileName: "safe_dir/link/payload.txt", + errContains: "not a directory", // attempt to write through symlink (which is not a directory) + }, + { + name: "symlink without payload file", + symlinkName: "standalone_link", + fileName: "", // no payload file + errContains: "", // no error expected, symlink without payload is safe + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + // create outside target directory + outsideDir := filepath.Join(tempDir, "outside_target") + require.NoError(t, os.MkdirAll(outsideDir, 0755)) + + // create extraction directory + extractDir := filepath.Join(tempDir, "extract") + require.NoError(t, os.MkdirAll(extractDir, 0755)) + + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName) + + err := UnzipToDir(context.Background(), maliciousZip, extractDir) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + } else { + require.NoError(t, err) + } + + analyzeExtractionDirectory(t, extractDir) + + // check if payload file escaped extraction directory + if tt.fileName != "" { + maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName)) + checkFileOutsideExtraction(t, maliciousFile) + } + + // check if symlink was created pointing outside + symlinkPath := filepath.Join(extractDir, tt.symlinkName) + checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir) + }) + } +} + +// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious +// ZIP archives containing symlink entries. +// +// EXPECTED BEHAVIOR: ContentsFromZip should either: +// 1. Reject symlinks explicitly, OR +// 2. Return empty content for symlinks (library behavior) +// +// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive +// files from outside the archive. +func TestContentsFromZip_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + symlinkTarget string + requestPath string + errContains string + }{ + { + name: "request symlink entry directly", + symlinkName: "evil_link", + symlinkTarget: "/etc/hosts", // attempt to read sensitive file + requestPath: "evil_link", + errContains: "", // no error expected - library returns symlink metadata + }, + { + name: "symlink in nested directory", + symlinkName: "nested/link", + symlinkTarget: "/etc/hosts", + requestPath: "nested/link", + errContains: "", // no error expected - library returns symlink metadata + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + // create malicious ZIP with symlink entry (no payload file needed) + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "") + + contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + return + } + require.NoError(t, err) + + // verify symlink handling - library should return symlink target as content (metadata) + content, found := contents[tt.requestPath] + require.True(t, found, "symlink entry should be found in results") + + // verify symlink was NOT followed (content should be target path or empty) + if content != "" && content != tt.symlinkTarget { + // content is not empty and not the symlink target - check if actual file was read + if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil { + targetContent, readErr := os.ReadFile(tt.symlinkTarget) + if readErr == nil && string(targetContent) == content { + t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!") + t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget) + t.Logf(" content length: %d bytes", len(content)) + } + } + } + }) + } +} + +// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile +// function with malicious ZIP archives containing symlink entries. +// +// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either: +// 1. Reject symlinks explicitly, OR +// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names) +// +// This function uses filepath.Base() on the archive entry name for temp file prefix and +// os.CreateTemp() which creates files in the specified directory, so it should be protected. +func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) { + tests := []struct { + name string + symlinkName string + symlinkTarget string + requestPath string + errContains string + }{ + { + name: "extract symlink entry to temp file", + symlinkName: "evil_link", + symlinkTarget: "/etc/passwd", + requestPath: "evil_link", + errContains: "", // no error expected - library extracts symlink metadata + }, + { + name: "extract nested symlink", + symlinkName: "nested/dir/link", + symlinkTarget: "/tmp/outside", + requestPath: "nested/dir/link", + errContains: "", // no error expected + }, + { + name: "extract path traversal symlink name", + symlinkName: "../../escape", + symlinkTarget: "/tmp/outside", + requestPath: "../../escape", + errContains: "", // no error expected - filepath.Base sanitizes name + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + + maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "") + + // create temp directory for extraction + extractTempDir := filepath.Join(tempDir, "temp_extract") + require.NoError(t, os.MkdirAll(extractTempDir, 0755)) + + openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath) + + // check error expectations + if tt.errContains != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.errContains) + return + } + require.NoError(t, err) + + // verify symlink was extracted + opener, found := openers[tt.requestPath] + require.True(t, found, "symlink entry should be extracted") + + // verify temp file is within temp directory + tempFilePath := opener.path + cleanTempDir := filepath.Clean(extractTempDir) + cleanTempFile := filepath.Clean(tempFilePath) + require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir), + "temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir) + + // verify symlink was NOT followed (content should be target path or empty) + f, openErr := opener.Open() + require.NoError(t, openErr) + defer f.Close() + + content, readErr := io.ReadAll(f) + require.NoError(t, readErr) + + // check if symlink was followed (content matches actual file) + if len(content) > 0 && string(content) != tt.symlinkTarget { + if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil { + targetContent, readErr := os.ReadFile(tt.symlinkTarget) + if readErr == nil && string(targetContent) == string(content) { + t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!") + t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget) + t.Logf(" content length: %d bytes", len(content)) + } + } + } + }) + } +} + +// forensicFindings contains the results of analyzing an extraction directory +type forensicFindings struct { + symlinksFound []forensicSymlink + regularFiles []string + directories []string + symlinkVulnerabilities []string +} + +type forensicSymlink struct { + path string + target string + escapesExtraction bool + resolvedPath string +} + +// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point +// outside the extraction directory. It is silent unless vulnerabilities are found. +func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings { + t.Helper() + + findings := forensicFindings{} + + filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + // only log if there's an error walking the directory + t.Logf("Error walking %s: %v", path, err) + return nil + } + + relPath := strings.TrimPrefix(path, extractDir+"/") + if relPath == "" { + relPath = "." + } + + // use Lstat to detect symlinks without following them + linfo, lerr := os.Lstat(path) + if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 { + target, _ := os.Readlink(path) + + // resolve to see where it actually points + var resolvedPath string + var escapesExtraction bool + + if filepath.IsAbs(target) { + // absolute symlink + resolvedPath = target + cleanExtractDir := filepath.Clean(extractDir) + escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) + + if escapesExtraction { + t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target) + t.Logf(" this symlink points outside the extraction directory") + findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities, + fmt.Sprintf("absolute symlink: %s → %s", relPath, target)) + } + } else { + // relative symlink - resolve it + resolvedPath = filepath.Join(filepath.Dir(path), target) + cleanResolved := filepath.Clean(resolvedPath) + cleanExtractDir := filepath.Clean(extractDir) + + escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir) + + if escapesExtraction { + t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target) + t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved) + findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities, + fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved)) + } + } + + findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{ + path: relPath, + target: target, + escapesExtraction: escapesExtraction, + resolvedPath: resolvedPath, + }) + } else { + // regular file or directory - collect silently + if info.IsDir() { + findings.directories = append(findings.directories, relPath) + } else { + findings.regularFiles = append(findings.regularFiles, relPath) + } + } + return nil + }) + + return findings +} + +// checkFileOutsideExtraction checks if a file was written outside the extraction directory. +// Returns true if the file exists (vulnerability), false otherwise. Silent on success. +func checkFileOutsideExtraction(t *testing.T, filePath string) bool { + t.Helper() + + if stat, err := os.Stat(filePath); err == nil { + content, _ := os.ReadFile(filePath) + t.Errorf("critical issue!... file written OUTSIDE extraction directory!") + t.Logf(" location: %s", filePath) + t.Logf(" size: %d bytes", stat.Size()) + t.Logf(" content: %s", string(content)) + t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem") + return true + } + // no file found outside extraction directory... + return false +} + +// checkSymlinkCreation verifies if a symlink was created at the expected path and reports +// whether it points outside the extraction directory. Silent unless a symlink is found. +func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool { + t.Helper() + + if linfo, err := os.Lstat(symlinkPath); err == nil { + if linfo.Mode()&os.ModeSymlink != 0 { + target, _ := os.Readlink(symlinkPath) + + if expectedTarget != "" && target == expectedTarget { + t.Errorf("critical issue!... symlink pointing outside extraction dir was created!") + t.Logf(" Symlink: %s → %s", symlinkPath, target) + return true + } + + // Check if it escapes even if target doesn't match expected + if filepath.IsAbs(target) { + cleanExtractDir := filepath.Clean(extractDir) + if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) { + t.Errorf("critical issue!... absolute symlink escapes extraction dir!") + t.Logf(" symlink: %s → %s", symlinkPath, target) + return true + } + } + } + // if it exists but is not a symlink, that's good (attack was thwarted)... + } + + return false +} + +// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target, +// followed by a file entry that attempts to write through that symlink. +// returns the path to the created ZIP archive. +func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string { + t.Helper() + + maliciousZip := filepath.Join(tempDir, "malicious.zip") + zipFile, err := os.Create(maliciousZip) + require.NoError(t, err) + defer zipFile.Close() + + zw := zip.NewWriter(zipFile) + + // create parent directories if the symlink is nested + if dir := filepath.Dir(symlinkName); dir != "." { + dirHeader := &zip.FileHeader{ + Name: dir + "/", + Method: zip.Store, + } + dirHeader.SetMode(os.ModeDir | 0755) + _, err = zw.CreateHeader(dirHeader) + require.NoError(t, err) + } + + // create symlink entry pointing outside extraction directory + // note: ZIP format stores symlinks as regular files with the target path as content + symlinkHeader := &zip.FileHeader{ + Name: symlinkName, + Method: zip.Store, + } + symlinkHeader.SetMode(os.ModeSymlink | 0755) + + symlinkWriter, err := zw.CreateHeader(symlinkHeader) + require.NoError(t, err) + + // write the symlink target as the file content (this is how ZIP stores symlinks) + _, err = symlinkWriter.Write([]byte(symlinkTarget)) + require.NoError(t, err) + + // create file entry that will be written through the symlink + if fileName != "" { + payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!") + payloadHeader := &zip.FileHeader{ + Name: fileName, + Method: zip.Deflate, + } + payloadHeader.SetMode(0644) + + payloadWriter, err := zw.CreateHeader(payloadHeader) + require.NoError(t, err) + + _, err = payloadWriter.Write(payloadContent) + require.NoError(t, err) + } + + require.NoError(t, zw.Close()) + require.NoError(t, zipFile.Close()) + + return maliciousZip +} diff --git a/internal/file/zip_read_closer.go b/internal/file/zip_read_closer.go deleted file mode 100644 index fd45f52a12b..00000000000 --- a/internal/file/zip_read_closer.go +++ /dev/null @@ -1,229 +0,0 @@ -package file - -import ( - "archive/zip" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - - "github.com/anchore/syft/internal/log" -) - -// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically: -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go -// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function. - -const ( - directoryEndLen = 22 - directory64LocLen = 20 - directory64EndLen = 56 - directory64LocSignature = 0x07064b50 - directory64EndSignature = 0x06064b50 -) - -// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips -// that have bytes prefixed to the front of the archive (common with self-extracting jars). -type ZipReadCloser struct { - *zip.Reader - io.Closer -} - -// OpenZip provides a ZipReadCloser for the given filepath. -func OpenZip(filepath string) (*ZipReadCloser, error) { - f, err := os.Open(filepath) - if err != nil { - return nil, err - } - fi, err := f.Stat() - if err != nil { - f.Close() - return nil, err - } - - // some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first - // need to find the start of the archive and keep track of this offset. - offset, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err) - return nil, err - } - - if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err) - } - - if offset > math.MaxInt64 { - return nil, fmt.Errorf("archive start offset too large: %v", offset) - } - offset64 := int64(offset) - - size := fi.Size() - offset64 - - r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size) - if err != nil { - log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err) - return nil, err - } - - return &ZipReadCloser{ - Reader: r, - Closer: f, - }, nil -} - -type readBuf []byte - -func (b *readBuf) uint16() uint16 { - v := binary.LittleEndian.Uint16(*b) - *b = (*b)[2:] - return v -} - -func (b *readBuf) uint32() uint32 { - v := binary.LittleEndian.Uint32(*b) - *b = (*b)[4:] - return v -} - -func (b *readBuf) uint64() uint64 { - v := binary.LittleEndian.Uint64(*b) - *b = (*b)[8:] - return v -} - -type directoryEnd struct { - diskNbr uint32 // unused - dirDiskNbr uint32 // unused - dirRecordsThisDisk uint64 // unused - directoryRecords uint64 - directorySize uint64 - directoryOffset uint64 // relative to file -} - -// note: this is derived from readDirectoryEnd within the archive/zip package -func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) { - // look for directoryEndSignature in the last 1k, then in the last 65k - var buf []byte - var directoryEndOffset int64 - for i, bLen := range []int64{1024, 65 * 1024} { - if bLen > size { - bLen = size - } - buf = make([]byte, int(bLen)) - if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) { - return 0, err - } - if p := findSignatureInBlock(buf); p >= 0 { - buf = buf[p:] - directoryEndOffset = size - bLen + int64(p) - break - } - if i == 1 || bLen == size { - return 0, zip.ErrFormat - } - } - - if buf == nil { - // we were unable to find the directoryEndSignature block - return 0, zip.ErrFormat - } - - // read header into struct - b := readBuf(buf[4:]) // skip signature - d := &directoryEnd{ - diskNbr: uint32(b.uint16()), - dirDiskNbr: uint32(b.uint16()), - dirRecordsThisDisk: uint64(b.uint16()), - directoryRecords: uint64(b.uint16()), - directorySize: uint64(b.uint32()), - directoryOffset: uint64(b.uint32()), - } - // Calculate where the zip data actually begins - - // These values mean that the file can be a zip64 file - if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { - p, err := findDirectory64End(r, directoryEndOffset) - if err == nil && p >= 0 { - directoryEndOffset = p - err = readDirectory64End(r, p, d) - } - if err != nil { - return 0, err - } - } - startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset - - // Make sure directoryOffset points to somewhere in our file. - if d.directoryOffset >= uint64(size) { - return 0, zip.ErrFormat - } - return startOfArchive, nil -} - -// findDirectory64End tries to read the zip64 locator just before the -// directory end and returns the offset of the zip64 directory end if -// found. -func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { - locOffset := directoryEndOffset - directory64LocLen - if locOffset < 0 { - return -1, nil // no need to look for a header outside the file - } - buf := make([]byte, directory64LocLen) - if _, err := r.ReadAt(buf, locOffset); err != nil { - return -1, err - } - b := readBuf(buf) - if sig := b.uint32(); sig != directory64LocSignature { - return -1, nil - } - if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory - return -1, nil // the file is not a valid zip64-file - } - p := b.uint64() // relative offset of the zip64 end of central directory record - if b.uint32() != 1 { // total number of disks - return -1, nil // the file is not a valid zip64-file - } - return int64(p), nil -} - -// readDirectory64End reads the zip64 directory end and updates the -// directory end with the zip64 directory end values. -func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { - buf := make([]byte, directory64EndLen) - if _, err := r.ReadAt(buf, offset); err != nil { - return err - } - - b := readBuf(buf) - if sig := b.uint32(); sig != directory64EndSignature { - return errors.New("could not read directory64End") - } - - b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) - d.diskNbr = b.uint32() // number of this disk - d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory - d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk - d.directoryRecords = b.uint64() // total number of entries in the central directory - d.directorySize = b.uint64() // size of the central directory - d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number - - return nil -} - -func findSignatureInBlock(b []byte) int { - for i := len(b) - directoryEndLen; i >= 0; i-- { - // defined from directoryEndSignature - if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { - // n is length of comment - n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 - if n+directoryEndLen+i <= len(b) { - return i - } - } - } - return -1 -} diff --git a/internal/file/zip_read_closer_test.go b/internal/file/zip_read_closer_test.go deleted file mode 100644 index 349bfcc9bf0..00000000000 --- a/internal/file/zip_read_closer_test.go +++ /dev/null @@ -1,50 +0,0 @@ -//go:build !windows -// +build !windows - -package file - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFindArchiveStartOffset(t *testing.T) { - tests := []struct { - name string - archivePrep func(tb testing.TB) string - expected uint64 - }{ - { - name: "standard, non-nested zip", - archivePrep: prepZipSourceFixture, - expected: 0, - }, - { - name: "zip with prepended bytes", - archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."), - expected: 36, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - archivePath := test.archivePrep(t) - f, err := os.Open(archivePath) - if err != nil { - t.Fatalf("could not open archive %q: %+v", archivePath, err) - } - fi, err := os.Stat(f.Name()) - if err != nil { - t.Fatalf("unable to stat archive: %+v", err) - } - - actual, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - t.Fatalf("unable to find offset: %+v", err) - } - assert.Equal(t, test.expected, actual) - }) - } -} diff --git a/internal/task/unknowns_tasks.go b/internal/task/unknowns_tasks.go index 0b8959bd0dc..2f63ce28ebd 100644 --- a/internal/task/unknowns_tasks.go +++ b/internal/task/unknowns_tasks.go @@ -4,7 +4,8 @@ import ( "context" "strings" - "github.com/anchore/archiver/v3" + "github.com/mholt/archives" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/cataloging" @@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) { } if c.IncludeUnexpandedArchives { + ctx := context.Background() for coords := range s.Artifacts.FileMetadata { - unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath) - if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) { + format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil) + if format != nil && notArchiveErr == nil && !hasPackageReference(coords) { s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") } } diff --git a/syft/format/github/internal/model/model.go b/syft/format/github/internal/model/model.go index 7d67e87630c..e3baaa747ee 100644 --- a/syft/format/github/internal/model/model.go +++ b/syft/format/github/internal/model/model.go @@ -1,11 +1,13 @@ package model import ( + "context" "fmt" "strings" "time" - "github.com/anchore/archiver/v3" + "github.com/mholt/archives" + "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" @@ -150,8 +152,8 @@ func trimRelative(s string) string { // isArchive returns true if the path appears to be an archive func isArchive(path string) bool { - _, err := archiver.ByExtension(path) - return err == nil + format, _, err := archives.Identify(context.Background(), path, nil) + return err == nil && format != nil } func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index d06b6587288..4a9c2c5cc8e 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -96,11 +96,6 @@ func uniquePkgKey(groupID string, p *pkg.Package) string { // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // and parse nested archives or ignore them. func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { - licenseScanner, err := licenses.ContextLicenseScanner(ctx) - if err != nil { - return nil, nil, fmt.Errorf("could not build license scanner for java archive parser: %w", err) - } - // fetch the last element of the virtual path virtualElements := strings.Split(reader.Path(), ":") currentFilepath := virtualElements[len(virtualElements)-1] @@ -110,7 +105,7 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) } - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } @@ -228,7 +223,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package, } // fetch the manifest file - contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) } @@ -387,8 +382,8 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro var pomProperties pkg.JavaPomProperties // Find the pom.properties/pom.xml if the names seem like a plausible match - properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) - projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) // map of all the artifacts in the pom properties, in order to chek exact match with the filename artifactsMap := make(map[string]bool) @@ -453,13 +448,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren var pkgs []pkg.Package // pom.properties - properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) if err != nil { return nil, err } // pom.xml - projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) if err != nil { return nil, err } @@ -505,7 +500,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg. } if len(licenseMatches) > 0 { - contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) } @@ -533,7 +528,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare // associating each discovered package to the given parent package. func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { // search and parse pom.properties files & fetch the contents - openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) + openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) } @@ -597,8 +592,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit return nestedPkgs, nestedRelationships, nil } -func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { - contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { + contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } @@ -626,8 +621,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra return propertiesByParentPath, nil } -func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { - contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { + contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 55a4a0c543d..7d49577ff4d 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -72,8 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) { require.NoError(t, err) // setup parser - ap, cleanupFn, err := newJavaArchiveParser( - ctx, + ap, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -343,8 +342,7 @@ func TestParseJar(t *testing.T) { UseNetwork: false, UseMavenLocalRepository: false, } - parser, cleanupFn, err := newJavaArchiveParser( - ctx, + parser, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -1393,8 +1391,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { fixture, err := os.Open(fixturePath) require.NoError(t, err) - parser, cleanupFn, err := newJavaArchiveParser( - ctx, + parser, cleanupFn, err := newJavaArchiveParser(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -1523,3 +1520,25 @@ func Test_corruptJarArchive(t *testing.T) { WithError(). TestParser(t, ap.parseJavaArchive) } + +func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) { + jarName := generateJavaMetadataJarFixture(t, "commons-lang3-3.12.0", "jar") + fixture, err := os.Open(jarName) + require.NoError(t, err) + + ctx := context.TODO() + // setup parser + ap, cleanupFn, err := newJavaArchiveParser(context.Background(), + file.LocationReadCloser{ + Location: file.NewLocation(fixture.Name()), + ReadCloser: fixture, + }, false, ArchiveCatalogerConfig{ + UseMavenLocalRepository: true, + MavenLocalRepositoryDir: "internal/maven/test-fixtures/maven-repo", + }) + defer cleanupFn() + require.NoError(t, err) + + _, _, err = ap.parse(ctx, nil) + require.NoError(t, err) +} diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go index 5af4f0b3f9c..4c4edc595e3 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con } func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { - openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) + openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) } diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go index 3dd1d25247d..e515f4f903c 100644 --- a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go @@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib // or archiver). - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) } diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index 09a4422ced5..fcd538d4200 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -4,14 +4,15 @@ import ( "context" "crypto" "fmt" + "io" "os" "path" "path/filepath" "sync" + "github.com/mholt/archives" "github.com/opencontainers/go-digest" - "github.com/anchore/archiver/v3" stereoFile "github.com/anchore/stereoscope/pkg/file" intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" @@ -223,15 +224,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // unarchived. - envelopedUnarchiver, err := archiver.ByExtension(path) - if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { - if tar, ok := unarchiver.(*archiver.Tar); ok { - // when tar files are extracted, if there are multiple entries at the same - // location, the last entry wins - // NOTE: this currently does not display any messages if an overwrite happens - tar.OverwriteExisting = true - } - + envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil) + if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil { return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) @@ -256,15 +250,58 @@ func digestOfFileContents(path string) string { return di.String() } -func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { +func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) { + var cleanupFn = func() error { return nil } + archive, err := os.Open(path) + if err != nil { + return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err) + } + defer archive.Close() + tempDir, err := os.MkdirTemp("", "syft-archive-contents-") if err != nil { - return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) + return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err) } - cleanupFn := func() error { - return os.RemoveAll(tempDir) + visitor := func(_ context.Context, file archives.FileInfo) error { + // Protect against symlink attacks by ensuring path doesn't escape tempDir + destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive) + if err != nil { + return err + } + + if file.IsDir() { + return os.MkdirAll(destPath, file.Mode()) + } + + if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil { + return fmt.Errorf("failed to create parent directory: %w", err) + } + + rc, err := file.Open() + if err != nil { + return fmt.Errorf("failed to open file in archive: %w", err) + } + defer rc.Close() + + destFile, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("failed to create file in destination: %w", err) + } + defer destFile.Close() + + if err := destFile.Chmod(file.Mode()); err != nil { + return fmt.Errorf("failed to change mode of destination file: %w", err) + } + + if _, err := io.Copy(destFile, rc); err != nil { + return fmt.Errorf("failed to copy file contents: %w", err) + } + + return nil } - return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) + return tempDir, func() error { + return os.RemoveAll(tempDir) + }, unarchiver.Extract(context.Background(), archive, visitor) } From 3fb3f3503f7fda6c4d35206d39d1a55883980e25 Mon Sep 17 00:00:00 2001 From: qingliu Date: Wed, 24 Dec 2025 22:12:42 +0800 Subject: [PATCH 2/2] fix(java): initialize license scanner in archive parser This commit addresses a potential vulnerability where the license scanner was not properly initialized before use in the Java archive parser, which could lead to nil pointer dereference. Additionally, updates test fixtures and assertions to reflect: - Updated package versions in Rocky Linux (curl-minimal, httpd) - Refactored deduplication tests to index by package name for better resilience to version changes - Added comprehensive test documentation --- .../integration/package_deduplication_test.go | 77 ++++++++++--------- .../image-vertical-package-dups/Dockerfile | 4 +- go.mod | 2 +- go.sum | 4 +- syft/pkg/cataloger/java/archive_parser.go | 5 ++ 5 files changed, 49 insertions(+), 43 deletions(-) diff --git a/cmd/syft/internal/test/integration/package_deduplication_test.go b/cmd/syft/internal/test/integration/package_deduplication_test.go index f2586a91129..82783400c68 100644 --- a/cmd/syft/internal/test/integration/package_deduplication_test.go +++ b/cmd/syft/internal/test/integration/package_deduplication_test.go @@ -1,7 +1,6 @@ package integration import ( - "fmt" "testing" "github.com/stretchr/testify/assert" @@ -11,47 +10,52 @@ import ( ) func TestPackageDeduplication(t *testing.T) { + // this test verifies that package deduplication works correctly across layers. + // The test fixture installs/upgrades packages in multiple stages, creating + // duplicate RPM DB entries across layers. Without deduplication, we'd see ~600 packages. + // + // Note: we index by package name (not name-version) to be resilient to Rocky Linux + // repo updates. Location counts are summed across all versions of each package. tests := []struct { scope source.Scope packageCount int - instanceCount map[string]int - locationCount map[string]int + instanceCount map[string]int // how many distinct package instances (by name) + locationCount map[string]int // total locations across ALL versions of each package }{ { scope: source.AllLayersScope, - packageCount: 175, // without deduplication this would be ~600 + packageCount: 176, // without deduplication this would be ~600 instanceCount: map[string]int{ "basesystem": 1, "wget": 1, - "curl-minimal": 2, // upgraded in the image + "curl-minimal": 2, // base + upgraded (2 different versions) "vsftpd": 1, - "httpd": 1, // rpm, - we exclude binary + "httpd": 1, }, locationCount: map[string]int{ - "basesystem-11-13.el9": 5, // in all layers - "curl-minimal-7.76.1-26.el9_3.2.0.1": 2, // base + wget layer - "curl-minimal-7.76.1-31.el9_6.1": 3, // curl upgrade layer + all above layers - "wget-1.21.1-8.el9_4": 4, // wget + all above layers - "vsftpd-3.0.5-6.el9": 2, // vsftpd + all above layers - "httpd-2.4.62-4.el9_6.4": 1, // last layer + "basesystem": 5, // in all layers + "curl-minimal": 5, // total across both versions (2 + 3) + "wget": 4, // wget + all above layers + "vsftpd": 2, // vsftpd + all above layers + "httpd": 1, // last layer }, }, { scope: source.SquashedScope, - packageCount: 169, + packageCount: 170, instanceCount: map[string]int{ "basesystem": 1, "wget": 1, - "curl-minimal": 1, // upgraded, but the most recent + "curl-minimal": 1, // deduped to latest "vsftpd": 1, - "httpd": 1, // rpm, binary is now excluded by overlap + "httpd": 1, }, locationCount: map[string]int{ - "basesystem-11-13.el9": 1, - "curl-minimal-7.76.1-31.el9_6.1": 1, // upgrade - "wget-1.21.1-8.el9_4": 1, - "vsftpd-3.0.5-6.el9": 1, - "httpd-2.4.62-4.el9_6.4": 1, + "basesystem": 1, + "curl-minimal": 1, + "wget": 1, + "vsftpd": 1, + "httpd": 1, }, }, } @@ -59,35 +63,32 @@ func TestPackageDeduplication(t *testing.T) { for _, tt := range tests { t.Run(string(tt.scope), func(t *testing.T) { sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope) + + // verify binary packages have names for _, p := range sbom.Artifacts.Packages.Sorted() { if p.Type == pkg.BinaryPkg { assert.NotEmpty(t, p.Name) } } + // verify exact package count assert.Equal(t, tt.packageCount, sbom.Artifacts.Packages.PackageCount()) - for name, expectedInstanceCount := range tt.instanceCount { - pkgs := sbom.Artifacts.Packages.PackagesByName(name) - // with multiple packages with the same name, something is wrong (or this is the wrong fixture) - if assert.Len(t, pkgs, expectedInstanceCount, "unexpected package count for %s", name) { - for _, p := range pkgs { - nameVersion := fmt.Sprintf("%s-%s", name, p.Version) - expectedLocationCount, ok := tt.locationCount[nameVersion] - if !ok { - t.Errorf("missing name-version: %s", nameVersion) - continue - } - - // we should see merged locations (assumption, there was 1 location for each package) - assert.Len(t, p.Locations.ToSlice(), expectedLocationCount, "unexpected location count for %s", nameVersion) + // verify instance count by package name + for name, expectedCount := range tt.instanceCount { + pkgs := sbom.Artifacts.Packages.PackagesByName(name) + assert.Len(t, pkgs, expectedCount, "unexpected instance count for %s", name) + } - // all paths should match - assert.Len(t, p.Locations.CoordinateSet().Paths(), 1, "unexpected location count for %s", nameVersion) - } + // verify total location count across all versions of each package + for name, expectedLocCount := range tt.locationCount { + pkgs := sbom.Artifacts.Packages.PackagesByName(name) + totalLocations := 0 + for _, p := range pkgs { + totalLocations += len(p.Locations.ToSlice()) } + assert.Equal(t, expectedLocCount, totalLocations, "unexpected total location count for %s", name) } - }) } } diff --git a/cmd/syft/internal/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile b/cmd/syft/internal/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile index 6fb68e98e78..a07c6e845ed 100644 --- a/cmd/syft/internal/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile +++ b/cmd/syft/internal/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile @@ -10,13 +10,13 @@ FROM base AS stage1 RUN dnf install -y wget-1.21.1-8.el9_4 FROM stage1 AS stage2 -RUN dnf update -y curl-minimal-7.76.1-31.el9_6.1 +RUN dnf update -y curl-minimal-7.76.1-34.el9 FROM stage2 AS stage3 RUN dnf install -y vsftpd-3.0.5-6.el9 FROM stage3 AS stage4 -RUN dnf install -y httpd-2.4.62-4.el9_6.4 +RUN dnf install -y httpd-2.4.62-7.el9 FROM scratch diff --git a/go.mod b/go.mod index d674cc3e965..a0207fa2b69 100644 --- a/go.mod +++ b/go.mod @@ -284,7 +284,7 @@ require ( ) require ( - github.com/nwaples/rardecode/v2 v2.2.0 // indirect + github.com/nwaples/rardecode/v2 v2.2.1 // indirect github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect github.com/olekukonko/errors v1.1.0 // indirect github.com/olekukonko/ll v0.1.2 // indirect diff --git a/go.sum b/go.sum index 8bbd58cdfbe..08a4141efdf 100644 --- a/go.sum +++ b/go.sum @@ -1339,8 +1339,8 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A= -github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= +github.com/nwaples/rardecode/v2 v2.2.1 h1:DgHK/O/fkTQEKBJxBMC5d9IU8IgauifbpG78+rZJMnI= +github.com/nwaples/rardecode/v2 v2.2.1/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc= github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6/go.mod h1:rEKTHC9roVVicUIfZK7DYrdIoM0EOr8mK1Hj5s3JjH0= github.com/olekukonko/errors v1.1.0 h1:RNuGIh15QdDenh+hNvKrJkmxxjV4hcS50Db478Ou5sM= diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 4a9c2c5cc8e..7a04fa9ec2e 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -96,6 +96,11 @@ func uniquePkgKey(groupID string, p *pkg.Package) string { // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // and parse nested archives or ignore them. func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { + licenseScanner, err := licenses.ContextLicenseScanner(ctx) + if err != nil { + return nil, nil, fmt.Errorf("could not build license scanner for java archive parser: %w", err) + } + // fetch the last element of the virtual path virtualElements := strings.Split(reader.Path(), ":") currentFilepath := virtualElements[len(virtualElements)-1]