Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ syft <image> -o <format>
Where the `formats` available are:
- `syft-json`: Use this to get as much information out of Syft as possible!
- `syft-text`: A row-oriented, human-and-machine-friendly output.
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).
Expand Down
77 changes: 39 additions & 38 deletions cmd/syft/internal/test/integration/package_deduplication_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package integration

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -11,83 +10,85 @@ import (
)

func TestPackageDeduplication(t *testing.T) {
// this test verifies that package deduplication works correctly across layers.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件是从 上游 main 分支检出的代码。

// The test fixture installs/upgrades packages in multiple stages, creating
// duplicate RPM DB entries across layers. Without deduplication, we'd see ~600 packages.
//
// Note: we index by package name (not name-version) to be resilient to Rocky Linux
// repo updates. Location counts are summed across all versions of each package.
tests := []struct {
scope source.Scope
packageCount int
instanceCount map[string]int
locationCount map[string]int
instanceCount map[string]int // how many distinct package instances (by name)
locationCount map[string]int // total locations across ALL versions of each package
}{
{
scope: source.AllLayersScope,
packageCount: 175, // without deduplication this would be ~600
packageCount: 176, // without deduplication this would be ~600
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
"curl-minimal": 2, // upgraded in the image
"curl-minimal": 2, // base + upgraded (2 different versions)
"vsftpd": 1,
"httpd": 1, // rpm, - we exclude binary
"httpd": 1,
},
locationCount: map[string]int{
"basesystem-11-13.el9": 5, // in all layers
"curl-minimal-7.76.1-26.el9_3.2.0.1": 2, // base + wget layer
"curl-minimal-7.76.1-31.el9_6.1": 3, // curl upgrade layer + all above layers
"wget-1.21.1-8.el9_4": 4, // wget + all above layers
"vsftpd-3.0.5-6.el9": 2, // vsftpd + all above layers
"httpd-2.4.62-4.el9_6.4": 1, // last layer
"basesystem": 5, // in all layers
"curl-minimal": 5, // total across both versions (2 + 3)
"wget": 4, // wget + all above layers
"vsftpd": 2, // vsftpd + all above layers
"httpd": 1, // last layer
},
},
{
scope: source.SquashedScope,
packageCount: 169,
packageCount: 170,
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
"curl-minimal": 1, // upgraded, but the most recent
"curl-minimal": 1, // deduped to latest
"vsftpd": 1,
"httpd": 1, // rpm, binary is now excluded by overlap
"httpd": 1,
},
locationCount: map[string]int{
"basesystem-11-13.el9": 1,
"curl-minimal-7.76.1-31.el9_6.1": 1, // upgrade
"wget-1.21.1-8.el9_4": 1,
"vsftpd-3.0.5-6.el9": 1,
"httpd-2.4.62-4.el9_6.4": 1,
"basesystem": 1,
"curl-minimal": 1,
"wget": 1,
"vsftpd": 1,
"httpd": 1,
},
},
}

for _, tt := range tests {
t.Run(string(tt.scope), func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope)

// verify binary packages have names
for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Type == pkg.BinaryPkg {
assert.NotEmpty(t, p.Name)
}
}

// verify exact package count
assert.Equal(t, tt.packageCount, sbom.Artifacts.Packages.PackageCount())
for name, expectedInstanceCount := range tt.instanceCount {
pkgs := sbom.Artifacts.Packages.PackagesByName(name)

// with multiple packages with the same name, something is wrong (or this is the wrong fixture)
if assert.Len(t, pkgs, expectedInstanceCount, "unexpected package count for %s", name) {
for _, p := range pkgs {
nameVersion := fmt.Sprintf("%s-%s", name, p.Version)
expectedLocationCount, ok := tt.locationCount[nameVersion]
if !ok {
t.Errorf("missing name-version: %s", nameVersion)
continue
}

// we should see merged locations (assumption, there was 1 location for each package)
assert.Len(t, p.Locations.ToSlice(), expectedLocationCount, "unexpected location count for %s", nameVersion)
// verify instance count by package name
for name, expectedCount := range tt.instanceCount {
pkgs := sbom.Artifacts.Packages.PackagesByName(name)
assert.Len(t, pkgs, expectedCount, "unexpected instance count for %s", name)
}

// all paths should match
assert.Len(t, p.Locations.CoordinateSet().Paths(), 1, "unexpected location count for %s", nameVersion)
}
// verify total location count across all versions of each package
for name, expectedLocCount := range tt.locationCount {
pkgs := sbom.Artifacts.Packages.PackagesByName(name)
totalLocations := 0
for _, p := range pkgs {
totalLocations += len(p.Locations.ToSlice())
}
assert.Equal(t, expectedLocCount, totalLocations, "unexpected total location count for %s", name)
}

})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ FROM base AS stage1
RUN dnf install -y wget-1.21.1-8.el9_4

FROM stage1 AS stage2
RUN dnf update -y curl-minimal-7.76.1-31.el9_6.1
RUN dnf update -y curl-minimal-7.76.1-34.el9
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件是 AI 帮忙修改的,原来的版本不存在了。


FROM stage2 AS stage3
RUN dnf install -y vsftpd-3.0.5-6.el9

FROM stage3 AS stage4
RUN dnf install -y httpd-2.4.62-4.el9_6.4
RUN dnf install -y httpd-2.4.62-7.el9

FROM scratch

Expand Down
17 changes: 9 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ require (
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/acobaugh/osrelease v0.1.0
github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
Expand Down Expand Up @@ -156,7 +155,7 @@ require (
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/fatih/color v1.17.0 // indirect
github.com/fatih/color v1.18.0 // indirect
github.com/felixge/fgprof v0.9.5 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.8.0 // indirect
Expand All @@ -170,7 +169,6 @@ require (
github.com/goccy/go-yaml v1.18.0
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
Expand All @@ -192,7 +190,7 @@ require (
github.com/logrusorgru/aurora v2.0.3+incompatible // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/maruel/natural v1.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-localereader v0.0.2-0.20220822084749-2491eb6c1c75 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
Expand All @@ -213,10 +211,6 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/nwaples/rardecode v1.1.3 // indirect
github.com/nwaples/rardecode/v2 v2.2.1 // indirect
github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 // indirect
github.com/olekukonko/ll v0.0.8 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/opencontainers/selinux v1.13.1 // indirect
Expand Down Expand Up @@ -289,6 +283,13 @@ require (
modernc.org/memory v1.11.0 // indirect
)

require (
github.com/nwaples/rardecode/v2 v2.2.1 // indirect
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
github.com/olekukonko/errors v1.1.0 // indirect
github.com/olekukonko/ll v0.1.2 // indirect
)

retract (
v1.25.0 // published with a replace directive (confusing for API users)
v0.53.2
Expand Down
23 changes: 10 additions & 13 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -664,8 +664,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
Expand Down Expand Up @@ -913,8 +911,8 @@ github.com/facebookincubator/nvdtools v0.1.5/go.mod h1:Kh55SAWnjckS96TBSrXI99KrE
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4=
github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
Expand Down Expand Up @@ -1033,7 +1031,6 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
Expand Down Expand Up @@ -1260,8 +1257,8 @@ github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVc
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
Expand Down Expand Up @@ -1342,14 +1339,14 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nwaples/rardecode/v2 v2.2.1 h1:DgHK/O/fkTQEKBJxBMC5d9IU8IgauifbpG78+rZJMnI=
github.com/nwaples/rardecode/v2 v2.2.1/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6 h1:r3FaAI0NZK3hSmtTDrBVREhKULp8oUeqLT5Eyl2mSPo=
github.com/olekukonko/errors v0.0.0-20250405072817-4e6d85265da6/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y=
github.com/olekukonko/ll v0.0.8 h1:sbGZ1Fx4QxJXEqL/6IG8GEFnYojUSQ45dJVwN2FH2fc=
github.com/olekukonko/ll v0.0.8/go.mod h1:En+sEW0JNETl26+K8eZ6/W4UQ7CYSrrgg/EdIYT2H8g=
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6/go.mod h1:rEKTHC9roVVicUIfZK7DYrdIoM0EOr8mK1Hj5s3JjH0=
github.com/olekukonko/errors v1.1.0 h1:RNuGIh15QdDenh+hNvKrJkmxxjV4hcS50Db478Ou5sM=
github.com/olekukonko/errors v1.1.0/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y=
github.com/olekukonko/ll v0.1.2 h1:lkg/k/9mlsy0SxO5aC+WEpbdT5K83ddnNhAepz7TQc0=
github.com/olekukonko/ll v0.1.2/go.mod h1:b52bVQRRPObe+yyBl0TxNfhesL0nedD4Cht0/zx55Ew=
github.com/olekukonko/tablewriter v1.0.7 h1:HCC2e3MM+2g72M81ZcJU11uciw6z/p82aEnm4/ySDGw=
github.com/olekukonko/tablewriter v1.0.7/go.mod h1:H428M+HzoUXC6JU2Abj9IT9ooRmdq9CxuDmKMtrOCMs=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
Expand Down
41 changes: 34 additions & 7 deletions internal/file/tar_file_traversal.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
package file

import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives"

"github.com/anchore/archiver/v3"
"github.com/anchore/syft/internal"
)

// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
tarReader, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
}
defer internal.CloseAndLogError(tarReader, archivePath)

format, _, err := archives.Identify(ctx, archivePath, nil)
if err != nil {
return fmt.Errorf("failed to identify tar compression format: %w", err)
}

extractor, ok := format.(archives.Extractor)
if !ok {
return fmt.Errorf("file format does not support extraction: %s", archivePath)
}

return extractor.Extract(ctx, tarReader, visitor)
}

// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener)

// don't allow for full traversal, only select traversal from given paths
if len(globs) == 0 {
return results, nil
}

visitor := func(file archiver.File) error {
defer file.Close()

visitor := func(_ context.Context, file archives.FileInfo) error {
// ignore directories
if file.IsDir() {
return nil
Expand All @@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()

if err := safeCopy(tempFile, file.ReadCloser); err != nil {
packedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
}
defer internal.CloseAndLogError(packedFile, archivePath)

if err := safeCopy(tempFile, packedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
}

Expand All @@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return nil
}

return results, archiver.Walk(archivePath, visitor)
return results, TraverseFilesInTar(ctx, archivePath, visitor)
}

func matchesAnyGlob(name string, globs ...string) bool {
Expand Down
17 changes: 11 additions & 6 deletions internal/file/zip_file_manifest.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package file

import (
"context"
"os"
"sort"
"strings"

"github.com/mholt/archives"
"github.com/scylladb/go-set/strset"

"github.com/anchore/syft/internal/log"
Expand All @@ -14,22 +16,25 @@ import (
type ZipFileManifest map[string]os.FileInfo

// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
zipReader, err := OpenZip(archivePath)
func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
zipReader, err := os.Open(archivePath)
manifest := make(ZipFileManifest)
if err != nil {
log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
return manifest, err
}
defer func() {
err = zipReader.Close()
if err != nil {
if err = zipReader.Close(); err != nil {
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
}
}()

for _, file := range zipReader.File {
manifest.Add(file.Name, file.FileInfo())
err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
manifest.Add(file.NameInArchive, file.FileInfo)
return nil
})
if err != nil {
return manifest, err
}
return manifest, nil
}
Expand Down
Loading
Loading