From 14119e650b279e7a53fffc57dff4cbc69088d2c0 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Mon, 5 May 2025 12:36:47 -0400 Subject: [PATCH 1/6] feat: add full text configuration to license struct Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/licenses/scanner.go | 1 + syft/cataloging/license.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/internal/licenses/scanner.go b/internal/licenses/scanner.go index 7728ba26b41..3adc4437e0e 100644 --- a/internal/licenses/scanner.go +++ b/internal/licenses/scanner.go @@ -15,6 +15,7 @@ import ( const ( DefaultCoverageThreshold = 75 // determined by experimentation DefaultIncludeLicenseContent = false + DefaultIncludeFullText = false ) type Scanner interface { diff --git a/syft/cataloging/license.go b/syft/cataloging/license.go index 24d8686f5eb..307a06ed1a6 100644 --- a/syft/cataloging/license.go +++ b/syft/cataloging/license.go @@ -3,12 +3,14 @@ package cataloging import "github.com/anchore/syft/internal/licenses" type LicenseConfig struct { + IncludeFullText bool `json:"include-full-text" yaml:"include-full-text" mapstructure:"include-full-text"` IncludeUnkownLicenseContent bool `json:"include-unknown-license-content" yaml:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` Coverage float64 `json:"coverage" yaml:"coverage" mapstructure:"coverage"` } func DefaultLicenseConfig() LicenseConfig { return LicenseConfig{ + IncludeFullText: licenses.DefaultIncludeFullText, IncludeUnkownLicenseContent: licenses.DefaultIncludeLicenseContent, Coverage: licenses.DefaultCoverageThreshold, } From 1a5c4510d6920dd5877282ddf5c459dc9964ec4c Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Mon, 5 May 2025 12:42:09 -0400 Subject: [PATCH 2/6] feat: add full text flag to syft options Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- cmd/syft/internal/options/catalog.go | 1 + cmd/syft/internal/options/license.go | 3 +++ 2 files changed, 4 insertions(+) diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index c27d5019e27..7d8666b97b0 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -158,6 +158,7 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config { func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig { return cataloging.LicenseConfig{ + IncludeFullText: cfg.License.IncludeFullText, IncludeUnkownLicenseContent: cfg.License.IncludeUnknownLicenseContent, Coverage: cfg.License.LicenseCoverage, } diff --git a/cmd/syft/internal/options/license.go b/cmd/syft/internal/options/license.go index 1cce6c29823..958dafbca07 100644 --- a/cmd/syft/internal/options/license.go +++ b/cmd/syft/internal/options/license.go @@ -5,6 +5,7 @@ import ( ) type licenseConfig struct { + IncludeFullText bool `yaml:"include-full-text" json:"include-full-text" mapstructure:"include-full-text"` IncludeUnknownLicenseContent bool `yaml:"include-unknown-license-content" json:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` LicenseCoverage float64 `yaml:"license-coverage" json:"license-coverage" mapstructure:"license-coverage"` } @@ -14,6 +15,7 @@ var _ interface { } = (*licenseConfig)(nil) func (o *licenseConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { + descriptions.Add(&o.IncludeFullText, `include the content of a license in the SBOM for all cases where a cataloger has license content access`) descriptions.Add(&o.IncludeUnknownLicenseContent, `include the content of a license in the SBOM when syft cannot determine a valid SPDX ID for the given license`) descriptions.Add(&o.LicenseCoverage, `adjust the percent as a fraction of the total text, in normalized words, that @@ -22,6 +24,7 @@ matches any valid license for the given inputs, expressed as a percentage across func defaultLicenseConfig() licenseConfig { return licenseConfig{ + IncludeFullText: false, IncludeUnknownLicenseContent: false, LicenseCoverage: 75, } From d16da2c0f8c8a10fab742823a15c8580044823dd Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Mon, 5 May 2025 15:51:06 -0400 Subject: [PATCH 3/6] feat: add fullText option for catalog licenses Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- cmd/syft/internal/options/catalog.go | 6 +-- internal/licenses/context_test.go | 6 +-- internal/licenses/scanner.go | 44 +++++++++------ internal/licenses/scanner_test.go | 11 ++-- internal/licenses/search.go | 81 +++++++++++++++------------- internal/licenses/search_test.go | 57 +++++++++++++++----- syft/cataloging/license.go | 12 ++--- syft/create_sbom.go | 4 +- syft/pkg/license.go | 17 ++++++ 9 files changed, 151 insertions(+), 87 deletions(-) diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index 7d8666b97b0..533cdd60533 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -158,9 +158,9 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config { func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig { return cataloging.LicenseConfig{ - IncludeFullText: cfg.License.IncludeFullText, - IncludeUnkownLicenseContent: cfg.License.IncludeUnknownLicenseContent, - Coverage: cfg.License.LicenseCoverage, + IncludeFullText: cfg.License.IncludeFullText, + IncludeUnknownLicenseContent: cfg.License.IncludeUnknownLicenseContent, + Coverage: cfg.License.LicenseCoverage, } } diff --git a/internal/licenses/context_test.go b/internal/licenses/context_test.go index 412222e365a..240b38e5f4a 100644 --- a/internal/licenses/context_test.go +++ b/internal/licenses/context_test.go @@ -8,7 +8,7 @@ import ( ) func TestSetContextLicenseScanner(t *testing.T) { - scanner := testScanner(true) + scanner := testScanner(true, false) ctx := context.Background() ctx = SetContextLicenseScanner(ctx, scanner) @@ -20,7 +20,7 @@ func TestSetContextLicenseScanner(t *testing.T) { } func TestIsContextLicenseScannerSet(t *testing.T) { - scanner := testScanner(true) + scanner := testScanner(true, false) ctx := context.Background() require.False(t, IsContextLicenseScannerSet(ctx)) @@ -30,7 +30,7 @@ func TestIsContextLicenseScannerSet(t *testing.T) { func TestContextLicenseScanner(t *testing.T) { t.Run("with scanner", func(t *testing.T) { - scanner := testScanner(true) + scanner := testScanner(true, false) ctx := SetContextLicenseScanner(context.Background(), scanner) s, err := ContextLicenseScanner(ctx) if err != nil || s != scanner { diff --git a/internal/licenses/scanner.go b/internal/licenses/scanner.go index 3adc4437e0e..867a9864e35 100644 --- a/internal/licenses/scanner.go +++ b/internal/licenses/scanner.go @@ -13,9 +13,9 @@ import ( ) const ( - DefaultCoverageThreshold = 75 // determined by experimentation - DefaultIncludeLicenseContent = false - DefaultIncludeFullText = false + DefaultCoverageThreshold = 75 // determined by experimentation + DefaultIncludeUnknownLicenseContent = false + DefaultIncludeFullText = false ) type Scanner interface { @@ -27,15 +27,17 @@ type Scanner interface { var _ Scanner = (*scanner)(nil) type scanner struct { - coverageThreshold float64 // between 0 and 100 - includeLicenseContent bool - scanner func([]byte) licensecheck.Coverage + coverageThreshold float64 // between 0 and 100 + includeUnknownLicenseContent bool + includeFullText bool + scanner func([]byte) licensecheck.Coverage } type ScannerConfig struct { - CoverageThreshold float64 - IncludeLicenseContent bool - Scanner func([]byte) licensecheck.Coverage + CoverageThreshold float64 + IncludeUnknownLicenseContent bool + IncludeFullText bool + Scanner func([]byte) licensecheck.Coverage } type Option func(*scanner) @@ -46,9 +48,15 @@ func WithCoverage(coverage float64) Option { } } -func WithIncludeLicenseContent(includeLicenseContent bool) Option { +func WithIncludeUnknownLicenseContent(includeUnknownLicenseContent bool) Option { return func(s *scanner) { - s.includeLicenseContent = includeLicenseContent + s.includeUnknownLicenseContent = includeUnknownLicenseContent + } +} + +func WithIncludeFullText(includeFullText bool) Option { + return func(s *scanner) { + s.includeFullText = includeFullText } } @@ -60,9 +68,10 @@ func NewDefaultScanner(o ...Option) (Scanner, error) { return nil, fmt.Errorf("unable to create default license scanner: %w", err) } newScanner := &scanner{ - coverageThreshold: DefaultCoverageThreshold, - includeLicenseContent: DefaultIncludeLicenseContent, - scanner: s.Scan, + coverageThreshold: DefaultCoverageThreshold, + includeUnknownLicenseContent: DefaultIncludeUnknownLicenseContent, + includeFullText: DefaultIncludeFullText, + scanner: s.Scan, } for _, opt := range o { @@ -79,8 +88,9 @@ func NewScanner(c *ScannerConfig) (Scanner, error) { } return &scanner{ - coverageThreshold: c.CoverageThreshold, - includeLicenseContent: c.IncludeLicenseContent, - scanner: c.Scanner, + coverageThreshold: c.CoverageThreshold, + includeFullText: c.IncludeFullText, + includeUnknownLicenseContent: c.IncludeUnknownLicenseContent, + scanner: c.Scanner, }, nil } diff --git a/internal/licenses/scanner_test.go b/internal/licenses/scanner_test.go index 090a65464d5..522043e70c2 100644 --- a/internal/licenses/scanner_test.go +++ b/internal/licenses/scanner_test.go @@ -45,7 +45,7 @@ func TestIdentifyLicenseIDs(t *testing.T) { t.Run(test.name, func(t *testing.T) { content, err := os.ReadFile(test.in) require.NoError(t, err) - ids, content, err := testScanner(false).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) + ids, content, err := testScanner(false, false).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) if test.expected.yieldError { require.Error(t, err) } else { @@ -66,11 +66,12 @@ func TestIdentifyLicenseIDs(t *testing.T) { } } -func testScanner(includeLicenseContent bool) Scanner { +func testScanner(includeUnknownLicenseContent, includeFullText bool) Scanner { return &scanner{ - coverageThreshold: DefaultCoverageThreshold, - includeLicenseContent: includeLicenseContent, - scanner: licensecheck.Scan, + coverageThreshold: DefaultCoverageThreshold, + includeUnknownLicenseContent: includeUnknownLicenseContent, + includeFullText: includeFullText, + scanner: licensecheck.Scan, } } diff --git a/internal/licenses/search.go b/internal/licenses/search.go index 1ade9b6339a..ab04a0c95c7 100644 --- a/internal/licenses/search.go +++ b/internal/licenses/search.go @@ -39,10 +39,16 @@ func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]str return nil, content, nil } + // we found some kind of license match var ids []string for _, m := range cov.Match { ids = append(ids, m.ID) } + + // sometimes users want the full license even if they got an SPDX ID from searching the content + if s.includeFullText { + return ids, content, nil + } return ids, nil, nil } @@ -55,31 +61,33 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) return nil, err } - // IdentifyLicenseIDs can only return a list of ID or content - // These return values are mutually exclusive. - // If the scanner threshold for matching scores < 75% then we return the license full content + // harmonize line endings to unix compatible first: + // 1. \r\n => \n (Windows => UNIX) + // 2. \r => \n (Macintosh => UNIX) + licContent := strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n") + + // known licenses found if len(ids) > 0 { for _, id := range ids { - lic := pkg.NewLicenseFromLocations(id, reader.Location) - lic.Type = license.Concluded - - licenses = append(licenses, lic) + if s.includeFullText { + licenses = append(licenses, pkg.NewLicenseFromFullText(id, licContent, reader.Location, license.Concluded)) + } else { + li := pkg.NewLicenseFromType(id, license.Concluded) + li.Locations.Add(reader.Location) + licenses = append(licenses, li) + } } - } else if len(content) > 0 { - // harmonize line endings to unix compatible first: - // 1. \r\n => \n (Windows => UNIX) - // 2. \r => \n (Macintosh => UNIX) - content = []byte(strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n")) - - lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location) - lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) - if s.includeLicenseContent { - lic.Contents = string(content) - } - lic.Type = license.Declared + return licenses, nil + } - licenses = append(licenses, lic) + // scanner could not find SPDX ID associated with content + lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location) + lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) + lic.Type = license.Declared + if s.includeUnknownLicenseContent { + lic.FullText = licContent } + licenses = append(licenses, lic) return licenses, nil } @@ -93,31 +101,30 @@ func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser return nil, err } - // IdentifyLicenseIDs can only return a list of ID or content - // These return values are mutually exclusive. - // If the scanner threshold for matching scores < 75% then we return the license full content + // harmonize line endings to unix compatible first: + // 1. \r\n => \n (Windows => UNIX) + // 2. \r => \n (Macintosh => UNIX) + licContent := strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n") if len(ids) > 0 { for _, id := range ids { lic := file.NewLicense(id) lic.Type = license.Concluded - + if s.includeFullText { + lic.Contents = licContent + } licenses = append(licenses, lic) } - } else if len(content) > 0 { - // harmonize line endings to unix compatible first: - // 1. \r\n => \n (Windows => UNIX) - // 2. \r => \n (Macintosh => UNIX) - content = []byte(strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n")) - - lic := file.NewLicense(unknownLicenseType) - lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) - if s.includeLicenseContent { - lic.Contents = string(content) - } - lic.Type = license.Declared + return licenses, nil + } - licenses = append(licenses, lic) + lic := file.NewLicense(unknownLicenseType) + lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) + if s.includeUnknownLicenseContent { + lic.Contents = licContent } + lic.Type = license.Declared + licenses = append(licenses, lic) + return licenses, nil } diff --git a/internal/licenses/search_test.go b/internal/licenses/search_test.go index 11138bf49b4..2634f2dab19 100644 --- a/internal/licenses/search_test.go +++ b/internal/licenses/search_test.go @@ -60,7 +60,7 @@ func TestSearchFileLicenses(t *testing.T) { ctx := context.TODO() content, err := os.ReadFile(test.in) require.NoError(t, err) - s := testScanner(false) + s := testScanner(false, false) result, err := s.FileSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) if test.expected.yieldError { require.Error(t, err) @@ -77,6 +77,11 @@ func TestSearchFileLicenses(t *testing.T) { } } +type scannerOptions struct { + includeUnknownLicenseContent bool + includeFullText bool +} + func TestSearchPkgLicenses(t *testing.T) { type expectation struct { wantErr require.ErrorAssertionFunc @@ -85,14 +90,15 @@ func TestSearchPkgLicenses(t *testing.T) { testLocation := file.NewLocation("LICENSE") tests := []struct { - name string - in string - includeUnkownLicenseContent bool - expected expectation + name string + in string + scannerConfig scannerOptions + expected expectation }{ { - name: "apache license 2.0", - in: "test-fixtures/apache-license-2.0", + name: "apache license 2.0 all text options off", + in: "test-fixtures/apache-license-2.0", + scannerConfig: scannerOptions{}, expected: expectation{ licenses: []pkg.License{ { @@ -108,8 +114,9 @@ func TestSearchPkgLicenses(t *testing.T) { }, }, { - name: "custom license no content by default", - in: "test-fixtures/nvidia-software-and-cuda-supplement", + name: "custom license no content by default", + in: "test-fixtures/nvidia-software-and-cuda-supplement", + scannerConfig: scannerOptions{}, expected: expectation{ licenses: []pkg.License{ { @@ -125,9 +132,11 @@ func TestSearchPkgLicenses(t *testing.T) { }, }, { - name: "custom license with content when scanner has content config", - in: "test-fixtures/nvidia-software-and-cuda-supplement", - includeUnkownLicenseContent: true, + name: "custom license with content when scanner has content config", + in: "test-fixtures/nvidia-software-and-cuda-supplement", + scannerConfig: scannerOptions{ + includeUnknownLicenseContent: true, + }, expected: expectation{ licenses: []pkg.License{ { @@ -136,7 +145,27 @@ func TestSearchPkgLicenses(t *testing.T) { Type: "declared", URLs: nil, Locations: file.NewLocationSet(testLocation), - Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), + FullText: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), + }, + }, + wantErr: nil, + }, + }, + { + name: "apache license 2.0 with full text when scanner has content config", + in: "test-fixtures/apache-license-2.0", + scannerConfig: scannerOptions{ + includeFullText: true, + }, + expected: expectation{ + licenses: []pkg.License{ + { + Value: "Apache-2.0", + SPDXExpression: "Apache-2.0", + Type: "concluded", + URLs: nil, + Locations: file.NewLocationSet(testLocation), + FullText: string(mustOpen("test-fixtures/apache-license-2.0")), }, }, wantErr: nil, @@ -149,7 +178,7 @@ func TestSearchPkgLicenses(t *testing.T) { ctx := context.TODO() content, err := os.ReadFile(test.in) require.NoError(t, err) - s := testScanner(test.includeUnkownLicenseContent) + s := testScanner(test.scannerConfig.includeUnknownLicenseContent, test.scannerConfig.includeFullText) result, err := s.PkgSearch(ctx, file.NewLocationReadCloser(file.NewLocation("LICENSE"), io.NopCloser(bytes.NewReader(content)))) if test.expected.wantErr != nil { test.expected.wantErr(t, err) diff --git a/syft/cataloging/license.go b/syft/cataloging/license.go index 307a06ed1a6..e732a71e5f2 100644 --- a/syft/cataloging/license.go +++ b/syft/cataloging/license.go @@ -3,15 +3,15 @@ package cataloging import "github.com/anchore/syft/internal/licenses" type LicenseConfig struct { - IncludeFullText bool `json:"include-full-text" yaml:"include-full-text" mapstructure:"include-full-text"` - IncludeUnkownLicenseContent bool `json:"include-unknown-license-content" yaml:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` - Coverage float64 `json:"coverage" yaml:"coverage" mapstructure:"coverage"` + IncludeFullText bool `json:"include-full-text" yaml:"include-full-text" mapstructure:"include-full-text"` + IncludeUnknownLicenseContent bool `json:"include-unknown-license-content" yaml:"include-unknown-license-content" mapstructure:"include-unknown-license-content"` + Coverage float64 `json:"coverage" yaml:"coverage" mapstructure:"coverage"` } func DefaultLicenseConfig() LicenseConfig { return LicenseConfig{ - IncludeFullText: licenses.DefaultIncludeFullText, - IncludeUnkownLicenseContent: licenses.DefaultIncludeLicenseContent, - Coverage: licenses.DefaultCoverageThreshold, + IncludeFullText: licenses.DefaultIncludeFullText, + IncludeUnknownLicenseContent: licenses.DefaultIncludeUnknownLicenseContent, + Coverage: licenses.DefaultCoverageThreshold, } } diff --git a/syft/create_sbom.go b/syft/create_sbom.go index 68605d56491..3037106dc0a 100644 --- a/syft/create_sbom.go +++ b/syft/create_sbom.go @@ -107,9 +107,9 @@ func setupContext(ctx context.Context, cfg *CreateSBOMConfig) (context.Context, // SetContextLicenseScanner creates and sets a license scanner // on the provided context using the provided license config. func SetContextLicenseScanner(ctx context.Context, cfg cataloging.LicenseConfig) (context.Context, error) { - // inject a single license scanner and content config for all package cataloging tasks into context licenseScanner, err := licenses.NewDefaultScanner( - licenses.WithIncludeLicenseContent(cfg.IncludeUnkownLicenseContent), + licenses.WithIncludeFullText(cfg.IncludeFullText), + licenses.WithIncludeUnknownLicenseContent(cfg.IncludeUnknownLicenseContent), licenses.WithCoverage(cfg.Coverage), ) if err != nil { diff --git a/syft/pkg/license.go b/syft/pkg/license.go index 43fdd85cbb4..544fdb54ee8 100644 --- a/syft/pkg/license.go +++ b/syft/pkg/license.go @@ -69,10 +69,27 @@ func (l Licenses) Swap(i, j int) { l[i], l[j] = l[j], l[i] } +// NewLicense returns a license for the provided value with a declared type func NewLicense(value string) License { return NewLicenseFromType(value, license.Declared) } +func NewLicenseFromFullText(id string, fullText string, location file.Location, t license.Type) License { + spdxExpression, err := license.ParseExpression(id) + if err != nil { + log.WithFields("error", err, "expression", id).Trace("unable to parse license expression") + } + + l := License{ + SPDXExpression: spdxExpression, + Value: id, + FullText: fullText, + Type: t, + } + l.Locations.Add(location) + return l +} + func NewLicenseFromType(value string, t license.Type) License { var ( spdxExpression string From bbbf1fab487284e84302476182e9304d1a2f7b90 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Mon, 5 May 2025 17:41:56 -0400 Subject: [PATCH 4/6] wip: wip reducing license content Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/licenses/scanner.go | 11 +- internal/licenses/scanner_test.go | 13 + internal/licenses/search.go | 37 +- internal/licenses/test-fixtures/multi-license | 446 ++++++++++++++++++ 4 files changed, 487 insertions(+), 20 deletions(-) create mode 100644 internal/licenses/test-fixtures/multi-license diff --git a/internal/licenses/scanner.go b/internal/licenses/scanner.go index 867a9864e35..272219725b0 100644 --- a/internal/licenses/scanner.go +++ b/internal/licenses/scanner.go @@ -18,8 +18,17 @@ const ( DefaultIncludeFullText = false ) +type ID struct { + LicenseID string + Offset Offset +} + +type Offset struct { + Start, End int +} + type Scanner interface { - IdentifyLicenseIDs(context.Context, io.Reader) ([]string, []byte, error) + IdentifyLicenseIDs(context.Context, io.Reader) ([]ID, []byte, error) FileSearch(context.Context, file.LocationReadCloser) ([]file.License, error) PkgSearch(context.Context, file.LocationReadCloser) ([]pkg.License, error) } diff --git a/internal/licenses/scanner_test.go b/internal/licenses/scanner_test.go index 522043e70c2..f2ce927035e 100644 --- a/internal/licenses/scanner_test.go +++ b/internal/licenses/scanner_test.go @@ -39,6 +39,19 @@ func TestIdentifyLicenseIDs(t *testing.T) { content: mustOpen("test-fixtures/nvidia-software-and-cuda-supplement"), }, }, + { + name: "Identify mutliple license IDs", + in: `test-fixtures/multi-license`, + expected: expectation{ + yieldError: false, + ids: []string{ + "Apache-2.0", + "BSD-2-Clause", + "BSD-3-Clause", + }, + content: mustOpen("test-fixtures/multi-license"), + }, + }, } for _, test := range tests { diff --git a/internal/licenses/search.go b/internal/licenses/search.go index ab04a0c95c7..424023cfe88 100644 --- a/internal/licenses/search.go +++ b/internal/licenses/search.go @@ -22,7 +22,7 @@ func getCustomLicenseContentHash(contents []byte) string { return fmt.Sprintf("%x", hash[:]) } -func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]string, []byte, error) { +func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]ID, []byte, error) { if s.scanner == nil { return nil, nil, nil } @@ -39,10 +39,9 @@ func (s *scanner) IdentifyLicenseIDs(_ context.Context, reader io.Reader) ([]str return nil, content, nil } - // we found some kind of license match - var ids []string + var ids []ID for _, m := range cov.Match { - ids = append(ids, m.ID) + ids = append(ids, ID{LicenseID: m.ID, Offset: Offset{Start: m.Start, End: m.End}}) } // sometimes users want the full license even if they got an SPDX ID from searching the content @@ -61,18 +60,14 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) return nil, err } - // harmonize line endings to unix compatible first: - // 1. \r\n => \n (Windows => UNIX) - // 2. \r => \n (Macintosh => UNIX) - licContent := strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n") - // known licenses found if len(ids) > 0 { for _, id := range ids { if s.includeFullText { - licenses = append(licenses, pkg.NewLicenseFromFullText(id, licContent, reader.Location, license.Concluded)) + extracted := string(content[id.Offset.Start:id.Offset.End]) + licenses = append(licenses, pkg.NewLicenseFromFullText(id.LicenseID, fixLineEndings(extracted), reader.Location, license.Concluded)) } else { - li := pkg.NewLicenseFromType(id, license.Concluded) + li := pkg.NewLicenseFromType(id.LicenseID, license.Concluded) li.Locations.Add(reader.Location) licenses = append(licenses, li) } @@ -85,7 +80,7 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) lic.Type = license.Declared if s.includeUnknownLicenseContent { - lic.FullText = licContent + lic.FullText = fixLineEndings(string(content)) } licenses = append(licenses, lic) @@ -101,16 +96,13 @@ func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser return nil, err } - // harmonize line endings to unix compatible first: - // 1. \r\n => \n (Windows => UNIX) - // 2. \r => \n (Macintosh => UNIX) - licContent := strings.ReplaceAll(strings.ReplaceAll(string(content), "\r\n", "\n"), "\r", "\n") if len(ids) > 0 { for _, id := range ids { - lic := file.NewLicense(id) + lic := file.NewLicense(id.LicenseID) lic.Type = license.Concluded if s.includeFullText { - lic.Contents = licContent + extracted := string(content[id.Offset.Start:id.Offset.End]) + lic.Contents = fixLineEndings(extracted) } licenses = append(licenses, lic) } @@ -120,7 +112,7 @@ func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser lic := file.NewLicense(unknownLicenseType) lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) if s.includeUnknownLicenseContent { - lic.Contents = licContent + lic.Contents = fixLineEndings(string(content)) } lic.Type = license.Declared @@ -128,3 +120,10 @@ func (s *scanner) FileSearch(ctx context.Context, reader file.LocationReadCloser return licenses, nil } + +func fixLineEndings(content string) string { + // harmonize line endings to unix compatible first: + // 1. \r\n => \n (Windows => UNIX) + // 2. \r => \n (Macintosh => UNIX) + return strings.ReplaceAll(strings.ReplaceAll(content, "\r\n", "\n"), "\r", "\n") +} diff --git a/internal/licenses/test-fixtures/multi-license b/internal/licenses/test-fixtures/multi-license new file mode 100644 index 00000000000..db98c077aed --- /dev/null +++ b/internal/licenses/test-fixtures/multi-license @@ -0,0 +1,446 @@ +EXPECTED:Apache-2.0,BSD-2-Clause,BSD-3-Clause,Copyright,MIT,NCSA,Unlicense,Zlib +Emscripten is available under 2 licenses, the MIT license and the +University of Illinois/NCSA Open Source License. + +Both are permissive open source licenses, with little if any +practical difference between them. + +The reason for offering both is that (1) the MIT license is +well-known, while (2) the University of Illinois/NCSA Open Source +License allows Emscripten's code to be integrated upstream into +LLVM, which uses that license, should the opportunity arise. + +Additionally, the binaryen project is available under the Apache License +Version 2.0. + +The full text of all three licenses follows. + +============================================================================== + +Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +============================================================================== + +Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file. +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal with the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + + Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimers. + + Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimers + in the documentation and/or other materials provided with the + distribution. + + Neither the names of Mozilla, + nor the names of its contributors may be used to endorse + or promote products derived from this Software without specific prior + written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. + +============================================================================== + +This program uses portions of Node.js source code located in src/library_path.js, +in accordance with the terms of the MIT license. Node's license follows: + + """ + Copyright Joyent, Inc. and other Node contributors. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. + """ + +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +============================================================================== + +Simple DirectMedia Layer + Copyright (C) 1997-2014 Sam Lantinga + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + +============================================================================== + +Files: tools/filelock.py + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + +============================================================================== + +Files: tools/eliminator/node_modules/uglify-js/... tools/node_modules/terser/... + + Distributed under the BSD license: + + Copyright 2012 (c) Mihai Bazon + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER “AS IS” AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + +============================================================================== + +Files: system/include/webgpu/webgpu.h + +BSD 3-Clause License + +Copyright (c) 2019, "WebGPU native" developers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +============================================================================== + +Copyright (c) 2005-2011 David Schultz +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. \ No newline at end of file From 53310a37fa425b5c2f6695f1e154d67c6c88b152 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Tue, 6 May 2025 11:10:40 -0400 Subject: [PATCH 5/6] chore: update scanner to return content and offset for license construction Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/licenses/scanner_test.go | 48 +++++++++++++++++-------------- internal/licenses/search.go | 6 ++-- internal/licenses/search_test.go | 4 +-- syft/pkg/license.go | 4 +-- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/internal/licenses/scanner_test.go b/internal/licenses/scanner_test.go index f2ce927035e..37925506c79 100644 --- a/internal/licenses/scanner_test.go +++ b/internal/licenses/scanner_test.go @@ -13,7 +13,7 @@ import ( func TestIdentifyLicenseIDs(t *testing.T) { type expectation struct { yieldError bool - ids []string + ids []ID content []byte } tests := []struct { @@ -22,32 +22,38 @@ func TestIdentifyLicenseIDs(t *testing.T) { expected expectation }{ { - name: "apache license 2.0", + name: "apache license 2.0 with content offset", in: `test-fixtures/apache-license-2.0`, expected: expectation{ yieldError: false, - ids: []string{"Apache-2.0"}, + ids: []ID{{LicenseID: "Apache-2.0", Offset: Offset{Start: 0, End: 11324}}}, content: nil, }, }, { - name: "custom license includes content for IdentifyLicenseIDs", + name: "custom license returns content for IdentifyLicenseIDs", in: "test-fixtures/nvidia-software-and-cuda-supplement", expected: expectation{ yieldError: false, - ids: []string{}, + ids: []ID{}, content: mustOpen("test-fixtures/nvidia-software-and-cuda-supplement"), }, }, { - name: "Identify mutliple license IDs", + name: "Identify multiple license IDs. They should be deduplicated and contain content evidence.", in: `test-fixtures/multi-license`, expected: expectation{ yieldError: false, - ids: []string{ - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", + ids: []ID{ + {LicenseID: "MIT", Offset: Offset{Start: 758, End: 1844}}, + {LicenseID: "NCSA", Offset: Offset{Start: 1925, End: 3463}}, + {LicenseID: "MIT", Offset: Offset{Start: 3708, End: 4932}}, + {LicenseID: "Apache-2.0", Offset: Offset{Start: 5021, End: 16378}}, + {LicenseID: "Zlib", Offset: Offset{Start: 16484, End: 17390}}, + {LicenseID: "Unlicense", Offset: Offset{Start: 17497, End: 18707}}, + {LicenseID: "BSD-2-Clause", Offset: Offset{Start: 18908, End: 20298}}, + {LicenseID: "BSD-3-Clause", Offset: Offset{Start: 20440, End: 21952}}, + {LicenseID: "BSD-2-Clause", Offset: Offset{Start: 22033, End: 23335}}, }, content: mustOpen("test-fixtures/multi-license"), }, @@ -58,22 +64,22 @@ func TestIdentifyLicenseIDs(t *testing.T) { t.Run(test.name, func(t *testing.T) { content, err := os.ReadFile(test.in) require.NoError(t, err) - ids, content, err := testScanner(false, false).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) + ids, content, err := testScanner(true, true).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) if test.expected.yieldError { require.Error(t, err) - } else { - require.NoError(t, err) + return + } + require.NoError(t, err) - require.Len(t, ids, len(test.expected.ids)) - require.Len(t, content, len(test.expected.content)) + require.Len(t, ids, len(test.expected.ids)) + require.Len(t, content, len(test.expected.content)) - if len(test.expected.ids) > 0 { - require.Equal(t, ids, test.expected.ids) - } + if len(test.expected.ids) > 0 { + require.Equal(t, ids, test.expected.ids) + } - if len(test.expected.content) > 0 { - require.Equal(t, content, test.expected.content) - } + if len(test.expected.content) > 0 { + require.Equal(t, content, test.expected.content) } }) } diff --git a/internal/licenses/search.go b/internal/licenses/search.go index 424023cfe88..e8bf241308b 100644 --- a/internal/licenses/search.go +++ b/internal/licenses/search.go @@ -65,7 +65,7 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) for _, id := range ids { if s.includeFullText { extracted := string(content[id.Offset.Start:id.Offset.End]) - licenses = append(licenses, pkg.NewLicenseFromFullText(id.LicenseID, fixLineEndings(extracted), reader.Location, license.Concluded)) + licenses = append(licenses, pkg.NewLicenseFromContent(id.LicenseID, fixLineEndings(extracted), reader.Location, license.Concluded)) } else { li := pkg.NewLicenseFromType(id.LicenseID, license.Concluded) li.Locations.Add(reader.Location) @@ -75,12 +75,12 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) return licenses, nil } - // scanner could not find SPDX ID associated with content + // scanner could not find any SPDX IDs associated with provided content lic := pkg.NewLicenseFromLocations(unknownLicenseType, reader.Location) lic.SPDXExpression = UnknownLicensePrefix + getCustomLicenseContentHash(content) lic.Type = license.Declared if s.includeUnknownLicenseContent { - lic.FullText = fixLineEndings(string(content)) + lic.Contents = fixLineEndings(string(content)) } licenses = append(licenses, lic) diff --git a/internal/licenses/search_test.go b/internal/licenses/search_test.go index 2634f2dab19..0e8318a1332 100644 --- a/internal/licenses/search_test.go +++ b/internal/licenses/search_test.go @@ -145,7 +145,7 @@ func TestSearchPkgLicenses(t *testing.T) { Type: "declared", URLs: nil, Locations: file.NewLocationSet(testLocation), - FullText: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), + Contents: string(mustOpen("test-fixtures/nvidia-software-and-cuda-supplement")), }, }, wantErr: nil, @@ -165,7 +165,7 @@ func TestSearchPkgLicenses(t *testing.T) { Type: "concluded", URLs: nil, Locations: file.NewLocationSet(testLocation), - FullText: string(mustOpen("test-fixtures/apache-license-2.0")), + Contents: string(mustOpen("test-fixtures/apache-license-2.0")), }, }, wantErr: nil, diff --git a/syft/pkg/license.go b/syft/pkg/license.go index 69bfea8a3b3..b2018e72cc6 100644 --- a/syft/pkg/license.go +++ b/syft/pkg/license.go @@ -73,7 +73,7 @@ func NewLicense(value string) License { return NewLicenseFromType(value, license.Declared) } -func NewLicenseFromFullText(id string, fullText string, location file.Location, t license.Type) License { +func NewLicenseFromContent(id string, content string, location file.Location, t license.Type) License { spdxExpression, err := license.ParseExpression(id) if err != nil { log.WithFields("error", err, "expression", id).Trace("unable to parse license expression") @@ -82,7 +82,7 @@ func NewLicenseFromFullText(id string, fullText string, location file.Location, l := License{ SPDXExpression: spdxExpression, Value: id, - FullText: fullText, + Contents: content, Type: t, } l.Locations.Add(location) From 66dcf992ba172dda1cf37f5eb3c2210db73aceae Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Tue, 6 May 2025 12:02:59 -0400 Subject: [PATCH 6/6] feat: update scanner to allow for duplicate ID given different contents Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/licenses/scanner_test.go | 27 ++++--- internal/licenses/search.go | 11 +-- internal/licenses/search_test.go | 114 ++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 16 deletions(-) diff --git a/internal/licenses/scanner_test.go b/internal/licenses/scanner_test.go index 37925506c79..e1a61831e0f 100644 --- a/internal/licenses/scanner_test.go +++ b/internal/licenses/scanner_test.go @@ -17,22 +17,26 @@ func TestIdentifyLicenseIDs(t *testing.T) { content []byte } tests := []struct { - name string - in string - expected expectation + name string + in string + includeUnknownLicenseContent bool + includeFullText bool + expected expectation }{ { - name: "apache license 2.0 with content offset", - in: `test-fixtures/apache-license-2.0`, + name: "apache license 2.0 with content offset and correct content", + in: `test-fixtures/apache-license-2.0`, + includeFullText: true, expected: expectation{ yieldError: false, ids: []ID{{LicenseID: "Apache-2.0", Offset: Offset{Start: 0, End: 11324}}}, - content: nil, + content: mustOpen("test-fixtures/apache-license-2.0"), }, }, { - name: "custom license returns content for IdentifyLicenseIDs", - in: "test-fixtures/nvidia-software-and-cuda-supplement", + name: "custom license returns content for IdentifyLicenseIDs", + in: "test-fixtures/nvidia-software-and-cuda-supplement", + includeUnknownLicenseContent: true, expected: expectation{ yieldError: false, ids: []ID{}, @@ -40,8 +44,9 @@ func TestIdentifyLicenseIDs(t *testing.T) { }, }, { - name: "Identify multiple license IDs. They should be deduplicated and contain content evidence.", - in: `test-fixtures/multi-license`, + name: "Identify multiple license IDs. They should be deduplicated and contain content evidence.", + in: `test-fixtures/multi-license`, + includeFullText: true, expected: expectation{ yieldError: false, ids: []ID{ @@ -64,7 +69,7 @@ func TestIdentifyLicenseIDs(t *testing.T) { t.Run(test.name, func(t *testing.T) { content, err := os.ReadFile(test.in) require.NoError(t, err) - ids, content, err := testScanner(true, true).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) + ids, content, err := testScanner(test.includeUnknownLicenseContent, test.includeFullText).IdentifyLicenseIDs(context.TODO(), bytes.NewReader(content)) if test.expected.yieldError { require.Error(t, err) return diff --git a/internal/licenses/search.go b/internal/licenses/search.go index e8bf241308b..dacbb9cc08d 100644 --- a/internal/licenses/search.go +++ b/internal/licenses/search.go @@ -63,14 +63,15 @@ func (s *scanner) PkgSearch(ctx context.Context, reader file.LocationReadCloser) // known licenses found if len(ids) > 0 { for _, id := range ids { - if s.includeFullText { + // make sure we can always slice content when asked for fullText + if s.includeFullText && id.Offset.Start >= 0 && id.Offset.End <= len(content) && id.Offset.Start <= id.Offset.End { extracted := string(content[id.Offset.Start:id.Offset.End]) licenses = append(licenses, pkg.NewLicenseFromContent(id.LicenseID, fixLineEndings(extracted), reader.Location, license.Concluded)) - } else { - li := pkg.NewLicenseFromType(id.LicenseID, license.Concluded) - li.Locations.Add(reader.Location) - licenses = append(licenses, li) + continue } + li := pkg.NewLicenseFromType(id.LicenseID, license.Concluded) + li.Locations.Add(reader.Location) + licenses = append(licenses, li) } return licenses, nil } diff --git a/internal/licenses/search_test.go b/internal/licenses/search_test.go index 0e8318a1332..03ffa9d4ade 100644 --- a/internal/licenses/search_test.go +++ b/internal/licenses/search_test.go @@ -89,6 +89,7 @@ func TestSearchPkgLicenses(t *testing.T) { } testLocation := file.NewLocation("LICENSE") + multiLicense := "test-fixtures/multi-license" tests := []struct { name string in string @@ -171,6 +172,90 @@ func TestSearchPkgLicenses(t *testing.T) { wantErr: nil, }, }, + { + name: "multiple licenses are returned from a single text with their full text when scanner has full text configured. duplicates with different contents are allowed", + in: multiLicense, + scannerConfig: scannerOptions{ + includeFullText: true, + }, + expected: expectation{ + licenses: []pkg.License{ + { + SPDXExpression: "MIT", + Value: "MIT", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 758, 1844), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "NCSA", + Value: "NCSA", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 1925, 3463), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "MIT", + Value: "MIT", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 3708, 4932), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "Apache-2.0", + Value: "Apache-2.0", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 5021, 16378), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "Zlib", + Value: "Zlib", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 16484, 17390), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "Unlicense", + Value: "Unlicense", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 17497, 18707), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "BSD-2-Clause", + Value: "BSD-2-Clause", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 18908, 20298), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "BSD-3-Clause", + Value: "BSD-3-Clause", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 20440, 21952), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + { + SPDXExpression: "BSD-2-Clause", + Value: "BSD-2-Clause", + Type: "concluded", + Contents: mustReadOffsetContent(t, multiLicense, 22033, 23335), + URLs: nil, + Locations: file.NewLocationSet(testLocation), + }, + }, + wantErr: nil, + }, + }, } for _, test := range tests { @@ -193,3 +278,32 @@ func TestSearchPkgLicenses(t *testing.T) { }) } } + +func mustReadOffsetContent(t *testing.T, path string, start, end int64) string { + t.Helper() + + if start < 0 || end < start { + t.Fatalf("invalid offsets: start=%d, end=%d", start, end) + } + + file, err := os.Open(path) + if err != nil { + t.Fatalf("failed to open file %q: %v", path, err) + } + defer file.Close() + + length := end - start + buffer := make([]byte, length) + + _, err = file.Seek(start, io.SeekStart) + if err != nil { + t.Fatalf("failed to seek to offset %d: %v", start, err) + } + + n, err := io.ReadFull(file, buffer) + if err != nil { + t.Fatalf("failed to read content: %v", err) + } + + return string(buffer[:n]) +}