Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4328a18
chore: update license signatures to inject context
spiffcs May 9, 2025
165b63a
feat: refactor scanner; update to new License Constructors
spiffcs May 9, 2025
772741c
chore: refactor catalogers that use scanner to use new constructor
spiffcs May 9, 2025
75a823b
chore: refactor tests to take new ctx param
spiffcs May 9, 2025
d3f9987
chore: fix static analysis
spiffcs May 9, 2025
47e3412
chore: small cleanup before unit test fixes
spiffcs May 9, 2025
99ca2f2
fix: update unit tests to pass with new content expectations
spiffcs May 12, 2025
884927f
chore: add back deprecated licenses
spiffcs May 12, 2025
185adbe
chore: refactor other licenses to be created with packages in SPDX
spiffcs May 12, 2025
d569180
feat: add configuration for IncludeLicenseContent
spiffcs May 12, 2025
cac618d
test: update snapshots given contents no longer hashed
spiffcs May 12, 2025
d32e08c
tests: add new tests to cover changes in spdx format
spiffcs May 13, 2025
920cb0f
chore: fix static analysis
spiffcs May 13, 2025
a1a046b
Merge branch 'main' into 3088-new-license-ctx-scaner
spiffcs May 13, 2025
6e6ba9c
address review comments
wagoodman May 13, 2025
dde463a
fix tests
wagoodman May 13, 2025
1aac0a7
feat: add warning in postload to communicate deprecated option
spiffcs May 13, 2025
1d89ee2
chore: fix unit tests with new license constraints
spiffcs May 13, 2025
e5f45f0
tests: add tests to apply license content rules to the package tasks
spiffcs May 13, 2025
bbdc8c2
Merge remote-tracking branch 'origin/main' into 3088-new-license-ctx-…
wagoodman May 13, 2025
3ae907c
chore: merge with main
spiffcs May 13, 2025
d2a0510
rename configuration
wagoodman May 13, 2025
7875c3a
deprecate the license-coverage configuration
wagoodman May 13, 2025
85046b0
put package ID under test when enforcing license config
wagoodman May 13, 2025
03bdd8c
restore url-only license cases
wagoodman May 13, 2025
84fc885
preserve whitespace in license contents
wagoodman May 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config {

func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig {
return cataloging.LicenseConfig{
IncludeUnkownLicenseContent: cfg.License.IncludeUnknownLicenseContent,
Coverage: cfg.License.LicenseCoverage,
IncludeContent: cfg.License.Content,
Coverage: cfg.License.Coverage,
}
}

Expand Down
65 changes: 58 additions & 7 deletions cmd/syft/internal/options/license.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,79 @@
package options

import (
"fmt"

"github.com/anchore/clio"
"github.com/anchore/syft/syft/cataloging"
)

type licenseConfig struct {
IncludeUnknownLicenseContent bool `yaml:"include-unknown-license-content" json:"include-unknown-license-content" mapstructure:"include-unknown-license-content"`
LicenseCoverage float64 `yaml:"license-coverage" json:"license-coverage" mapstructure:"license-coverage"`
Content cataloging.LicenseContent `yaml:"content" json:"content" mapstructure:"content"`
// Deprecated: please use include-license-content instead
IncludeUnknownLicenseContent *bool `yaml:"-" json:"-" mapstructure:"include-unknown-license-content"`

Coverage float64 `yaml:"coverage" json:"coverage" mapstructure:"coverage"`
// Deprecated: please use coverage instead
LicenseCoverage *float64 `yaml:"license-coverage" json:"license-coverage" mapstructure:"license-coverage"`

AvailableLicenseContent []cataloging.LicenseContent `yaml:"-" json:"-" mapstructure:"-"`
}

var _ interface {
clio.FieldDescriber
} = (*licenseConfig)(nil)

func (o *licenseConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.IncludeUnknownLicenseContent, `include the content of a license in the SBOM when syft
cannot determine a valid SPDX ID for the given license`)
descriptions.Add(&o.LicenseCoverage, `adjust the percent as a fraction of the total text, in normalized words, that
descriptions.Add(&o.Content, fmt.Sprintf("include the content of licenses in the SBOM for a given syft scan; valid values are: %s", o.AvailableLicenseContent))
descriptions.Add(&o.IncludeUnknownLicenseContent, `deprecated: please use 'license-content' instead`)

descriptions.Add(&o.Coverage, `adjust the percent as a fraction of the total text, in normalized words, that
matches any valid license for the given inputs, expressed as a percentage across all of the licenses matched.`)
descriptions.Add(&o.LicenseCoverage, `deprecated: please use 'coverage' instead`)
}

func (o *licenseConfig) PostLoad() error {
cfg := cataloging.DefaultLicenseConfig()
defaultContent := cfg.IncludeContent
defaultCoverage := cfg.Coverage

// if both legacy and new fields are specified, error out
if o.IncludeUnknownLicenseContent != nil && o.Content != defaultContent {
return fmt.Errorf("both 'include-unknown-license-content' and 'content' are set, please use only 'content'")
}

if o.LicenseCoverage != nil && o.Coverage != defaultCoverage {
return fmt.Errorf("both 'license-coverage' and 'coverage' are set, please use only 'coverage'")
}

// finalize the license content value
if o.IncludeUnknownLicenseContent != nil {
// convert 'include-unknown-license-content' -> 'license-content'
v := cataloging.LicenseContentExcludeAll
if *o.IncludeUnknownLicenseContent {
v = cataloging.LicenseContentIncludeUnknown
}
o.Content = v
}

// finalize the coverage value
if o.LicenseCoverage != nil {
// convert 'license-coverage' -> 'coverage'
o.Coverage = *o.LicenseCoverage
}

return nil
}

func defaultLicenseConfig() licenseConfig {
cfg := cataloging.DefaultLicenseConfig()
return licenseConfig{
IncludeUnknownLicenseContent: false,
LicenseCoverage: 75,
Content: cfg.IncludeContent,
Coverage: cfg.Coverage,
AvailableLicenseContent: []cataloging.LicenseContent{
cataloging.LicenseContentIncludeAll,
cataloging.LicenseContentIncludeUnknown,
cataloging.LicenseContentExcludeAll,
},
}
}
6 changes: 3 additions & 3 deletions internal/licenses/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
)

func TestSetContextLicenseScanner(t *testing.T) {
scanner := testScanner(true)
scanner := testScanner()
Comment thread
spiffcs marked this conversation as resolved.
ctx := context.Background()
ctx = SetContextLicenseScanner(ctx, scanner)

Expand All @@ -20,7 +20,7 @@ func TestSetContextLicenseScanner(t *testing.T) {
}

func TestIsContextLicenseScannerSet(t *testing.T) {
scanner := testScanner(true)
scanner := testScanner()
ctx := context.Background()
require.False(t, IsContextLicenseScannerSet(ctx))

Expand All @@ -30,7 +30,7 @@ func TestIsContextLicenseScannerSet(t *testing.T) {

func TestContextLicenseScanner(t *testing.T) {
t.Run("with scanner", func(t *testing.T) {
scanner := testScanner(true)
scanner := testScanner()
ctx := SetContextLicenseScanner(context.Background(), scanner)
s, err := ContextLicenseScanner(ctx)
if err != nil || s != scanner {
Expand Down
36 changes: 36 additions & 0 deletions internal/licenses/find_evidence.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package licenses

import (
"context"
"io"
)

func (s *scanner) FindEvidence(_ context.Context, reader io.Reader) (evidence []Evidence, content []byte, err error) {
if s.scanner == nil {
return nil, nil, nil
}

content, err = io.ReadAll(reader)
if err != nil {
return nil, nil, err
}

cov := s.scanner(content)
if cov.Percent < s.coverageThreshold {
// unknown or no licenses here
// => check return content to Search to process
return nil, content, nil
}

evidence = make([]Evidence, 0)
for _, m := range cov.Match {
evidence = append(evidence, Evidence{
ID: m.ID,
Type: m.Type,
Start: m.Start,
End: m.End,
IsURL: m.IsURL,
})
}
return evidence, content, nil
}
81 changes: 81 additions & 0 deletions internal/licenses/find_evidence_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package licenses

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/google/licensecheck"
"github.com/stretchr/testify/require"
)

func TestDefaultScanner_FindEvidence(t *testing.T) {
testCases := []struct {
name string
fixture string
wantIDs []string // expected license IDs
minMatch int // minimum # of matches required
}{
{
name: "Single licenses are able to be recognized and returned Apache 2.0",
fixture: "test-fixtures/apache-license-2.0",
wantIDs: []string{"Apache-2.0"},
minMatch: 1,
},
{
name: "Multiple Licenses are returned as evidence with duplicates at different offset",
fixture: "test-fixtures/multi-license",
wantIDs: []string{
"MIT",
"MIT",
"NCSA",
"Apache-2.0",
"Zlib",
"Unlicense",
"BSD-2-Clause",
"BSD-2-Clause",
"BSD-3-Clause",
},
minMatch: 2,
},
}

scanner := testScanner()
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
filePath := filepath.Clean(tc.fixture)
f, err := os.Open(filePath)
require.NoError(t, err)
defer f.Close()

evidence, content, err := scanner.FindEvidence(context.Background(), f)
require.NoError(t, err)
require.NotEmpty(t, content)
require.GreaterOrEqual(t, len(evidence), tc.minMatch, "expected at least %d matches", tc.minMatch)

var foundIDs []string
for _, ev := range evidence {
foundIDs = append(foundIDs, ev.ID)
}

require.ElementsMatch(t, tc.wantIDs, foundIDs, "expected license IDs %v, but got %v", tc.wantIDs, foundIDs)
})
}
}

func testScanner() Scanner {
return &scanner{
coverageThreshold: DefaultCoverageThreshold,
scanner: licensecheck.Scan,
}
}

func mustOpen(fixture string) []byte {
content, err := os.ReadFile(fixture)
if err != nil {
panic(err)
}

return content
}
47 changes: 22 additions & 25 deletions internal/licenses/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,37 @@ import (
"github.com/google/licensecheck"

"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)

const (
DefaultCoverageThreshold = 75 // determined by experimentation
DefaultIncludeLicenseContent = false
UnknownLicensePrefix = unknownLicenseType + "_"
DefaultCoverageThreshold = 75 // determined by experimentation

unknownLicenseType = "UNKNOWN"
)

type Evidence struct {
ID string // License identifier. (See licenses/README.md.)
Type licensecheck.Type // The type of the license: BSD, MIT, etc.
Start int // Start offset of match in text; match is at text[Start:End].
End int // End offset of match in text.
IsURL bool // Whether match is a URL.
}

type Scanner interface {
IdentifyLicenseIDs(context.Context, io.Reader) ([]string, []byte, error)
FileSearch(context.Context, file.LocationReadCloser) ([]file.License, error)
PkgSearch(context.Context, file.LocationReadCloser) ([]pkg.License, error)
FindEvidence(context.Context, io.Reader) ([]Evidence, []byte, error)
Comment thread
wagoodman marked this conversation as resolved.
}

var _ Scanner = (*scanner)(nil)

type scanner struct {
coverageThreshold float64 // between 0 and 100
includeLicenseContent bool
scanner func([]byte) licensecheck.Coverage
coverageThreshold float64 // between 0 and 100
scanner func([]byte) licensecheck.Coverage
}

type ScannerConfig struct {
CoverageThreshold float64
IncludeLicenseContent bool
Scanner func([]byte) licensecheck.Coverage
CoverageThreshold float64
Scanner func([]byte) licensecheck.Coverage
}

type Option func(*scanner)
Expand All @@ -45,23 +49,17 @@ func WithCoverage(coverage float64) Option {
}
}

func WithIncludeLicenseContent(includeLicenseContent bool) Option {
return func(s *scanner) {
s.includeLicenseContent = includeLicenseContent
}
}

// NewDefaultScanner returns a scanner that uses a new instance of the default licensecheck package scanner.
func NewDefaultScanner(o ...Option) (Scanner, error) {
s, err := licensecheck.NewScanner(licensecheck.BuiltinLicenses())
if err != nil {
log.WithFields("error", err).Trace("unable to create default license scanner")
return nil, fmt.Errorf("unable to create default license scanner: %w", err)
}

newScanner := &scanner{
coverageThreshold: DefaultCoverageThreshold,
includeLicenseContent: DefaultIncludeLicenseContent,
scanner: s.Scan,
coverageThreshold: DefaultCoverageThreshold,
scanner: s.Scan,
}

for _, opt := range o {
Expand All @@ -78,8 +76,7 @@ func NewScanner(c *ScannerConfig) (Scanner, error) {
}

return &scanner{
coverageThreshold: c.CoverageThreshold,
includeLicenseContent: c.IncludeLicenseContent,
scanner: c.Scanner,
coverageThreshold: c.CoverageThreshold,
scanner: c.Scanner,
}, nil
}
Loading
Loading