From 1e7d2c74986bc1416fc2b74416164565cf6165e4 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 4 Oct 2022 14:17:04 -0400 Subject: [PATCH 1/5] feat: add identifiable field to source object Allow source.Source struct to set reproducable id for different scheme. This ID is calcuated either as a digest from the given directory or file path. If the scheme is detected to be an ImageScheme then the ID is calculated as a ChainID: https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid Signed-off-by: Christopher Phillips --- syft/source/source.go | 46 ++++++++++++++++++++++++++++++++++++++ syft/source/source_test.go | 38 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/syft/source/source.go b/syft/source/source.go index c2ea7326f83..1572e296b0e 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -15,6 +15,7 @@ import ( "github.com/bmatcuk/doublestar/v4" "github.com/mholt/archiver/v3" + digest "github.com/opencontainers/go-digest" "github.com/spf13/afero" "github.com/anchore/stereoscope" @@ -25,6 +26,7 @@ import ( // Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used // in cataloging (based on the data source and configuration) type Source struct { + id string Image *image.Image // the image object to be cataloged (image only) Metadata Metadata directoryResolver *directoryResolver @@ -304,6 +306,50 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { }, nil } +func (s *Source) ID() string { + return s.id +} + +func (s *Source) SetID() { + if s.Metadata.Scheme != ImageScheme { + // How do we generate ID for non-image sources? + s.id = digest.FromString(s.Metadata.Path).String() + return + } + + // calcuate chain ID for image sources + // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid + s.id = calculateChainID(s.Image) + + if s.id == "" { + // TODO what happens here if image has no layers? + s.id = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() + } + return +} + +func calculateChainID(img *image.Image) string { + if len(img.Layers) < 1 { + return "" + } + + // DiffID(L0) = digest of layer 0 + // https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32 + chainID := img.Layers[0].Metadata.Digest + id := chain(chainID, img.Layers[1:]) + + return id +} + +func chain(chainID string, layers []*image.Layer) string { + if len(layers) < 1 { + return chainID + } + + chainID = digest.FromString(layers[0].Metadata.Digest + " " + chainID).String() + return chain(chainID, layers[1:]) +} + func (s *Source) FileResolver(scope Scope) (FileResolver, error) { switch s.Metadata.Scheme { case DirectoryScheme, FileScheme: diff --git a/syft/source/source_test.go b/syft/source/source_test.go index e16e608da0c..4fca966f363 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -65,6 +65,44 @@ func TestNewFromImageFails(t *testing.T) { }) } +func TestSetID(t *testing.T) { + img := imagetest.GetFixtureImage(t, "oci-archive", "image-simple") + tests := []struct { + name string + input *Source + expected string + }{ + { + name: "source.SetID sets the ID for non image sources", + input: &Source{ + Metadata: Metadata{ + Scheme: FileScheme, + Path: "test-fixtures/image-simple/file-1.txt", + }, + }, + expected: "sha256:fbfb0730f4306b27c118715998ba58f1ad350f0451513c36c267dc4b9d3b688d", + }, + { + name: "source.SetID sets the ID for image sources", + input: &Source{ + Image: img, + Metadata: Metadata{ + Scheme: ImageScheme, + ImageMetadata: NewImageMetadata(img, "image-simple"), + }, + }, + expected: "sha256:e6d9f87981af1a1007a42be43b21ba6abe7c1608b1541e877c69052af5356669", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + test.input.SetID() + assert.Equal(t, test.expected, test.input.ID()) + }) + } +} + func TestNewFromImage(t *testing.T) { layer := image.NewLayer(nil) img := image.Image{ From 520cc83869918ce5242c862aeddc186dbc311394 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 5 Oct 2022 10:11:28 -0400 Subject: [PATCH 2/5] chore: run go mod tidy Signed-off-by: Christopher Phillips --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0b87b0ea819..f0774e29290 100644 --- a/go.mod +++ b/go.mod @@ -60,6 +60,7 @@ require ( github.com/google/go-containerregistry v0.11.0 github.com/in-toto/in-toto-golang v0.3.4-0.20220709202702-fa494aaa0add github.com/knqyf263/go-rpmdb v0.0.0-20220629110411-9a3bd2ebb923 + github.com/opencontainers/go-digest v1.0.0 github.com/sassoftware/go-rpmutils v0.2.0 github.com/sigstore/cosign v1.12.1 github.com/sigstore/rekor v0.12.1-0.20220915152154-4bb6f441c1b2 @@ -218,7 +219,6 @@ require ( github.com/mozillazg/docker-credential-acr-helper v0.3.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/oklog/ulid v1.3.1 // indirect - github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pelletier/go-toml/v2 v2.0.5 // indirect From 2d0044472ab9c148ef150efc023459f4576376c1 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 5 Oct 2022 11:01:43 -0400 Subject: [PATCH 3/5] fix: update unit tests to pass for all scheme Signed-off-by: Christopher Phillips --- syft/source/source.go | 53 ++++++++++++++++++++++++++++---------- syft/source/source_test.go | 47 ++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 23 deletions(-) diff --git a/syft/source/source.go b/syft/source/source.go index 1572e296b0e..43afa3a96fb 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -21,12 +21,13 @@ import ( "github.com/anchore/stereoscope" "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" ) // Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used // in cataloging (based on the data source and configuration) type Source struct { - id string + id artifact.ID Image *image.Image // the image object to be cataloged (image only) Metadata Metadata directoryResolver *directoryResolver @@ -306,25 +307,49 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { }, nil } -func (s *Source) ID() string { +func (s *Source) ID() artifact.ID { return s.id } func (s *Source) SetID() { - if s.Metadata.Scheme != ImageScheme { - // How do we generate ID for non-image sources? - s.id = digest.FromString(s.Metadata.Path).String() - return - } - - // calcuate chain ID for image sources - // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid - s.id = calculateChainID(s.Image) + var d string + switch s.Metadata.Scheme { + case DirectoryScheme: + d = digest.FromString(s.Metadata.Path).String() + case FileScheme: + // attempt to use the digest of the contents of the file as the ID + file, err := os.Open(s.Metadata.Path) + if err != nil { + d = digest.FromString(s.Metadata.Path).String() + break + } + di, err := digest.FromReader(file) + if err != nil { + d = digest.FromString(s.Metadata.Path).String() + break + } + d = di.String() + case ImageScheme: + manifestDigest := digest.FromBytes(s.Image.Metadata.RawManifest).String() + if manifestDigest != "" { + d = manifestDigest + break + } - if s.id == "" { - // TODO what happens here if image has no layers? - s.id = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() + // calcuate chain ID for image sources where manifestDigest is not available + // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid + d = calculateChainID(s.Image) + if d == "" { + // TODO what happens here if image has no layers? + // Is this case possible + d = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() + } + default: // for UnknownScheme we hash the struct + id, _ := artifact.IDByHash(s) + d = string(id) } + + s.id = artifact.ID(strings.TrimPrefix(d, "sha256:")) return } diff --git a/syft/source/source_test.go b/syft/source/source_test.go index 4fca966f363..48c68fb847d 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/artifact" ) func TestParseInput(t *testing.T) { @@ -66,32 +67,60 @@ func TestNewFromImageFails(t *testing.T) { } func TestSetID(t *testing.T) { - img := imagetest.GetFixtureImage(t, "oci-archive", "image-simple") + layer := image.NewLayer(nil) + layer.Metadata = image.LayerMetadata{ + Digest: "sha256:6f4fb385d4e698647bf2a450749dfbb7bc2831ec9a730ef4046c78c08d468e89", + } + img := image.Image{ + Layers: []*image.Layer{layer}, + } + tests := []struct { name string input *Source - expected string + expected artifact.ID }{ { - name: "source.SetID sets the ID for non image sources", + name: "source.SetID sets the ID for FileScheme", input: &Source{ Metadata: Metadata{ Scheme: FileScheme, Path: "test-fixtures/image-simple/file-1.txt", }, }, - expected: "sha256:fbfb0730f4306b27c118715998ba58f1ad350f0451513c36c267dc4b9d3b688d", + expected: artifact.ID("55096713247489add592ce977637be868497132b36d1e294a3831925ec64319a"), + }, + { + name: "source.SetID sets the ID for ImageScheme", + input: &Source{ + Image: &img, + Metadata: Metadata{ + Scheme: ImageScheme, + }, + }, + expected: artifact.ID("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + }, + { + name: "source.SetID sets the ID for DirectoryScheme", + input: &Source{ + Image: &img, + Metadata: Metadata{ + Scheme: DirectoryScheme, + Path: "test-fixtures/image-simple", + }, + }, + expected: artifact.ID("91db61e5e0ae097ef764796ce85e442a93f2a03e5313d4c7307e9b413f62e8c4"), }, { - name: "source.SetID sets the ID for image sources", + name: "source.SetID sets the ID for UnknownScheme", input: &Source{ - Image: img, + Image: &img, Metadata: Metadata{ - Scheme: ImageScheme, - ImageMetadata: NewImageMetadata(img, "image-simple"), + Scheme: UnknownScheme, + Path: "test-fixtures/image-simple", }, }, - expected: "sha256:e6d9f87981af1a1007a42be43b21ba6abe7c1608b1541e877c69052af5356669", + expected: artifact.ID("febd2d6148dc327d"), }, } From c407df11313fdca5471b36c81d4208aa25c067a1 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 5 Oct 2022 11:07:20 -0400 Subject: [PATCH 4/5] fix: update lint suggestions [optional footer(s)] Signed-off-by: Christopher Phillips --- syft/source/source.go | 1 - 1 file changed, 1 deletion(-) diff --git a/syft/source/source.go b/syft/source/source.go index 43afa3a96fb..060171d8ca9 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -350,7 +350,6 @@ func (s *Source) SetID() { } s.id = artifact.ID(strings.TrimPrefix(d, "sha256:")) - return } func calculateChainID(img *image.Image) string { From bafb4276a640bd5598ed1f12f823275969f1392d Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Wed, 5 Oct 2022 12:14:24 -0400 Subject: [PATCH 5/5] fix: add call to s.SetID when invoking ID if blank Signed-off-by: Christopher Phillips --- syft/source/source.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/syft/source/source.go b/syft/source/source.go index 060171d8ca9..cab3e65405d 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -307,7 +307,10 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { }, nil } -func (s *Source) ID() artifact.ID { +func (s Source) ID() artifact.ID { + if s.id == "" { + s.SetID() + } return s.id }