diff --git a/extractor/filesystem/language/rust/cargoauditable/cargoauditable.go b/extractor/filesystem/language/rust/cargoauditable/cargoauditable.go new file mode 100644 index 000000000..266d41d36 --- /dev/null +++ b/extractor/filesystem/language/rust/cargoauditable/cargoauditable.go @@ -0,0 +1,179 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cargoauditable extracts dependencies from cargo auditable inside rust binaries. +package cargoauditable + +import ( + "context" + "errors" + "fmt" + "io" + + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/log" + "github.com/google/osv-scalibr/plugin" + "github.com/google/osv-scalibr/purl" + "github.com/google/osv-scalibr/stats" + "github.com/microsoft/go-rustaudit" +) + +// defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal. +// If Extract gets a bigger file, it will return an error. +const defaultMaxFileSizeBytes = 0 + +// defaultExtractBuildDependencies is whether to extract build dependencies or only runtime ones. +const defaultExtractBuildDependencies = false + +// Config is the configuration for the Extractor. +type Config struct { + // Stats is a stats collector for reporting metrics. + Stats stats.Collector + // MaxFileSizeBytes is the maximum size of a file that can be extracted. + // If this limit is greater than zero and a file is encountered that is larger + // than this limit, the file is ignored by returning false for `FileRequired`. + MaxFileSizeBytes int64 + // ExtractBuildDependencies is whether to extract build dependencies or only runtime ones. + ExtractBuildDependencies bool +} + +// Extractor for extracting dependencies from cargo auditable inside rust binaries. +type Extractor struct { + stats stats.Collector + maxFileSizeBytes int64 + extractBuildDependencies bool +} + +// DefaultConfig returns a default configuration for the extractor. +func DefaultConfig() Config { + return Config{ + Stats: nil, + MaxFileSizeBytes: defaultMaxFileSizeBytes, + ExtractBuildDependencies: defaultExtractBuildDependencies, + } +} + +// New returns a Cargo Auditable extractor. +// +// For most use cases, initialize with: +// ``` +// e := New(DefaultConfig()) +// ``` +func New(cfg Config) *Extractor { + return &Extractor{ + stats: cfg.Stats, + maxFileSizeBytes: cfg.MaxFileSizeBytes, + extractBuildDependencies: cfg.ExtractBuildDependencies, + } +} + +// Name of the extractor. +func (e Extractor) Name() string { return "rust/cargoauditable" } + +// Version of the extractor. +func (e Extractor) Version() int { return 0 } + +// Requirements for enabling the extractor. +func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } + +// FileRequired returns true if the specified file is marked executable. +func (e Extractor) FileRequired(api filesystem.FileAPI) bool { + path := api.Path() + + fileinfo, err := api.Stat() + if err != nil { + return false + } + + if !filesystem.IsInterestingExecutable(api) { + return false + } + + sizeLimitExceeded := e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes + result := stats.FileRequiredResultOK + if sizeLimitExceeded { + result = stats.FileRequiredResultSizeLimitExceeded + } + + if e.stats != nil { + e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ + Path: path, + Result: result, + FileSizeBytes: fileinfo.Size(), + }) + } + return !sizeLimitExceeded +} + +// Extract extracts packages from cargo auditable inside rust binaries. +func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) { + reader, ok := input.Reader.(io.ReaderAt) + if !ok { + return nil, fmt.Errorf("input.Reader is not a ReaderAt") + } + + dependencyInfo, err := rustaudit.GetDependencyInfo(reader) + e.reportFileExtracted(input, filesystem.ExtractorErrorToFileExtractedResult(err)) + // Most errors are just that the file is not a cargo auditable rust binary. + if err != nil { + if errors.Is(err, rustaudit.ErrUnknownFileFormat) || errors.Is(err, rustaudit.ErrNoRustDepInfo) { + return []*extractor.Inventory{}, nil + } + log.Debugf("error getting dependency information from binary (%s) for extraction: %v", input.Path, err) + return nil, fmt.Errorf("rustaudit.GetDependencyInfo(%q): %w", input.Path, err) + } + + inventory := []*extractor.Inventory{} + for _, dep := range dependencyInfo.Packages { + // Cargo auditable also tracks build-only dependencies which we may not want to report. + // Note: the main package is reported as a runtime dependency. + if dep.Kind == rustaudit.Runtime || e.extractBuildDependencies { + inventory = append(inventory, &extractor.Inventory{ + Name: dep.Name, + Version: dep.Version, + Locations: []string{input.Path}, + }) + } + } + return inventory, nil +} + +func (e Extractor) reportFileExtracted(input *filesystem.ScanInput, result stats.FileExtractedResult) { + if e.stats == nil { + return + } + e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ + Path: input.Path, + Result: result, + FileSizeBytes: input.Info.Size(), + }) +} + +// ToPURL converts an inventory created by this extractor into a PURL. +func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL { + return &purl.PackageURL{ + Type: purl.TypeCargo, + Name: i.Name, + Version: i.Version, + } +} + +// Ecosystem returns the OSV ecosystem ('crates.io') of the software extracted by this extractor. +func (e Extractor) Ecosystem(_ *extractor.Inventory) string { + return "crates.io" +} + +// Ensure Extractor implements the filesystem.Extractor interface. +var _ filesystem.Extractor = Extractor{} diff --git a/extractor/filesystem/language/rust/cargoauditable/cargoauditable_test.go b/extractor/filesystem/language/rust/cargoauditable/cargoauditable_test.go new file mode 100644 index 000000000..c1a228896 --- /dev/null +++ b/extractor/filesystem/language/rust/cargoauditable/cargoauditable_test.go @@ -0,0 +1,263 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cargoauditable_test + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/google/osv-scalibr/extractor" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" + "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" + scalibrfs "github.com/google/osv-scalibr/fs" + "github.com/google/osv-scalibr/purl" + "github.com/google/osv-scalibr/stats" + "github.com/google/osv-scalibr/testing/fakefs" + "github.com/google/osv-scalibr/testing/testcollector" +) + +func TestFileRequired(t *testing.T) { + tests := []struct { + name string + path string + mode fs.FileMode + fileSizeBytes int64 + maxFileSizeBytes int64 + wantRequired bool + wantResultMetric stats.FileRequiredResult + }{ + { + name: "executable required if size less than maxFileSizeBytes", + path: "some/path/a", + mode: 0766, + fileSizeBytes: 100, + maxFileSizeBytes: 1000, + wantRequired: true, + wantResultMetric: stats.FileRequiredResultOK, + }, + { + name: "executable required if size equal to maxFileSizeBytes", + path: "some/path/a", + mode: 0766, + fileSizeBytes: 1000, + maxFileSizeBytes: 1000, + wantRequired: true, + wantResultMetric: stats.FileRequiredResultOK, + }, + { + name: "executable not required if size greater than maxFileSizeBytes", + path: "some/path/a", + mode: 0766, + fileSizeBytes: 1000, + maxFileSizeBytes: 100, + wantRequired: false, + wantResultMetric: stats.FileRequiredResultSizeLimitExceeded, + }, + { + name: "executable required if maxFileSizeBytes explicitly set to 0", + path: "some/path/a", + mode: 0766, + fileSizeBytes: 1000, + maxFileSizeBytes: 0, + wantRequired: true, + wantResultMetric: stats.FileRequiredResultOK, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + collector := testcollector.New() + e := cargoauditable.New(cargoauditable.Config{ + Stats: collector, + MaxFileSizeBytes: tt.maxFileSizeBytes, + }) + + // Set a default file size if not specified. + fileSizeBytes := tt.fileSizeBytes + if fileSizeBytes == 0 { + fileSizeBytes = 1000 + } + + if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{ + FileName: filepath.Base(tt.path), + FileMode: tt.mode, + FileSize: fileSizeBytes, + })); got != tt.wantRequired { + t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired) + } + + gotResultMetric := collector.FileRequiredResult(tt.path) + if gotResultMetric != tt.wantResultMetric { + t.Errorf("FileRequired(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric) + } + }) + } +} + +func TestToPURL(t *testing.T) { + cargoAuditableExtractor := cargoauditable.Extractor{} + inventory := &extractor.Inventory{ + Name: "name", + Version: "1.2.3", + Locations: []string{"location"}, + } + want := &purl.PackageURL{ + Type: purl.TypeCargo, + Name: "name", + Version: "1.2.3", + } + got := cargoAuditableExtractor.ToPURL(inventory) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("ToPURL(%v) (-want +got):\n%s", inventory, diff) + } +} + +func TestExtract(t *testing.T) { + tests := []struct { + name string + path string + wantInventory []*extractor.Inventory + wantErr error + wantResultMetric stats.FileExtractedResult + }{ + { + name: "uses_serde_json", + path: "testdata/uses_serde_json/uses_serde_json", + wantInventory: []*extractor.Inventory{ + { + Name: "itoa", + Version: "1.0.14", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "memchr", + Version: "2.7.4", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "proc-macro2", + Version: "1.0.92", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "quote", + Version: "1.0.38", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "ryu", + Version: "1.0.18", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "serde", + Version: "1.0.217", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "serde_derive", + Version: "1.0.217", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "serde_json", + Version: "1.0.135", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "syn", + Version: "2.0.95", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "unicode-ident", + Version: "1.0.14", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + { + Name: "uses_json", + Version: "0.1.0", + Locations: []string{"testdata/uses_serde_json/uses_serde_json"}, + }, + }, + }, + { + name: "no_deps", + path: "testdata/no_deps/no_deps", + wantInventory: []*extractor.Inventory{ + { + Name: "no_deps", + Version: "0.1.0", + Locations: []string{"testdata/no_deps/no_deps"}, + }, + }, + }, + { + name: "not_binary", + path: "testdata/not_binary/not_binary", + wantInventory: []*extractor.Inventory{}, + wantResultMetric: stats.FileExtractedResultErrorUnknown, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f, err := os.Open(tt.path) + if err != nil { + t.Fatalf("os.Open(%s) unexpected error: %v", tt.path, err) + } + defer f.Close() + + info, err := f.Stat() + if err != nil { + t.Fatalf("f.Stat() for %q unexpected error: %v", tt.path, err) + } + + collector := testcollector.New() + + input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: f} + + e := cargoauditable.New(cargoauditable.Config{Stats: collector}) + got, err := e.Extract(context.Background(), input) + if err != tt.wantErr { + t.Fatalf("Extract(%s) got error: %v, want error: %v", tt.path, err, tt.wantErr) + } + sort := func(a, b *extractor.Inventory) bool { return a.Name < b.Name } + if diff := cmp.Diff(tt.wantInventory, got, cmpopts.SortSlices(sort)); diff != "" { + t.Fatalf("Extract(%s) (-want +got):\n%s", tt.path, diff) + } + + wantResultMetric := tt.wantResultMetric + if wantResultMetric == "" && tt.wantErr == nil { + wantResultMetric = stats.FileExtractedResultSuccess + } + gotResultMetric := collector.FileExtractedResult(tt.path) + if gotResultMetric != wantResultMetric { + t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, wantResultMetric) + } + + gotFileSizeMetric := collector.FileExtractedFileSize(tt.path) + if gotFileSizeMetric != info.Size() { + t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size()) + } + }) + } +} diff --git a/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/main.rs b/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/main.rs new file mode 100644 index 000000000..30894431e --- /dev/null +++ b/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/main.rs @@ -0,0 +1,15 @@ +// Source Code used to create the no_deps binary. + +/* Cargo.toml + +[package] +name = "no_deps" +version = "0.1.0" +edition = "2021" + +[dependencies] +*/ + +fn main() { + println!("Hello, world!"); +} diff --git a/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/no_deps b/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/no_deps new file mode 100755 index 000000000..7153b3913 Binary files /dev/null and b/extractor/filesystem/language/rust/cargoauditable/testdata/no_deps/no_deps differ diff --git a/extractor/filesystem/language/rust/cargoauditable/testdata/not_binary/not_binary b/extractor/filesystem/language/rust/cargoauditable/testdata/not_binary/not_binary new file mode 100755 index 000000000..ec2c8dbbb --- /dev/null +++ b/extractor/filesystem/language/rust/cargoauditable/testdata/not_binary/not_binary @@ -0,0 +1,5 @@ +#!/bin/bash + +echo "Though you may well take a banana, and might even cut it in half\ + carefully, as not to also slice your finger or make the sides unequally...\ + Bash scripts are not Rust Binaries and cannot be audited by cargo auditable." diff --git a/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/main.rs b/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/main.rs new file mode 100644 index 000000000..46ca0f5ef --- /dev/null +++ b/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/main.rs @@ -0,0 +1,41 @@ +// Source Code used to create the uses_json binary. + +/* Cargo.toml + +[package] +name = "uses_json" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +*/ + +use serde::{Deserialize, Serialize}; +use serde_json::Result; + +#[derive(Serialize, Deserialize)] +struct Thingy { + name: String, + id: u32, +} + +fn deserialize(data: &str) -> Result { + Ok(serde_json::from_str(data)?) +} + +fn serialize(p: &Thingy) -> Result { + let j = serde_json::to_string(&p)?; + Ok(j) +} + +fn main() -> Result<()> { + let data = r#"{"name": "foo", "id": 314}"#; + let obj = deserialize(&data)?; + + println!("Thingy '{}' has id {}", obj.name, obj.id); + + println!("{}", serialize(&obj)?); + Ok(()) +} diff --git a/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/uses_serde_json b/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/uses_serde_json new file mode 100755 index 000000000..891f10451 Binary files /dev/null and b/extractor/filesystem/language/rust/cargoauditable/testdata/uses_serde_json/uses_serde_json differ diff --git a/extractor/filesystem/list/list.go b/extractor/filesystem/list/list.go index ae5bcaf4f..1258b6ced 100644 --- a/extractor/filesystem/list/list.go +++ b/extractor/filesystem/list/list.go @@ -56,6 +56,7 @@ import ( "github.com/google/osv-scalibr/extractor/filesystem/language/r/renvlock" "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemfilelock" "github.com/google/osv-scalibr/extractor/filesystem/language/ruby/gemspec" + "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargoauditable" "github.com/google/osv-scalibr/extractor/filesystem/language/rust/cargolock" "github.com/google/osv-scalibr/extractor/filesystem/language/swift/packageresolved" "github.com/google/osv-scalibr/extractor/filesystem/language/swift/podfilelock" @@ -126,7 +127,10 @@ var ( // Ruby extractors. Ruby []filesystem.Extractor = []filesystem.Extractor{gemspec.New(gemspec.DefaultConfig()), &gemfilelock.Extractor{}} // Rust extractors. - Rust []filesystem.Extractor = []filesystem.Extractor{cargolock.Extractor{}} + Rust []filesystem.Extractor = []filesystem.Extractor{ + cargolock.Extractor{}, + cargoauditable.New(cargoauditable.DefaultConfig()), + } // SBOM extractors. SBOM []filesystem.Extractor = []filesystem.Extractor{&cdx.Extractor{}, &spdx.Extractor{}} // Dotnet (.NET) extractors. diff --git a/go.mod b/go.mod index cb4deaec7..72f16d266 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/google/uuid v1.6.0 github.com/groob/plist v0.1.1 github.com/mattn/go-sqlite3 v1.14.22 + github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0 github.com/opencontainers/runtime-spec v1.1.0 diff --git a/go.sum b/go.sum index 7f678a38a..de23e1b58 100644 --- a/go.sum +++ b/go.sum @@ -127,6 +127,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b h1:84JbAJpjZ8p1ttV6dpIqfe8IehWMf0i8DPSgmE9aZuA= +github.com/microsoft/go-rustaudit v0.0.0-20240820110456-0e2abec02f8b/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=