From d42a87a0fcf528297e806b70b4d2f78492ab36c8 Mon Sep 17 00:00:00 2001 From: Oliver Heger Date: Thu, 20 Jun 2024 08:47:13 +0200 Subject: [PATCH] fix(scanner): Store only distinct results of package scanners When calling storage writers to store provenance scan results produced by package scanners, make sure that writers are invoked only once for each distinct combination of provenance and scan result. Before this change, if there were multiple root projects in a repository, the writers were called for each of them with the same scan result. This could lead to duplication when storing results. Signed-off-by: Oliver Heger --- scanner/src/main/kotlin/Scanner.kt | 24 +++++++++++---------- scanner/src/test/kotlin/ScannerTest.kt | 30 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/scanner/src/main/kotlin/Scanner.kt b/scanner/src/main/kotlin/Scanner.kt index f9ce8dc3a4c9d..d5a1b979d7fd1 100644 --- a/scanner/src/main/kotlin/Scanner.kt +++ b/scanner/src/main/kotlin/Scanner.kt @@ -356,15 +356,27 @@ class Scanner( "Finished scan of ${nestedProvenance.root} with package scanner '${scanner.name}'." } + val provenanceScanResultsToStore = mutableSetOf>() packagesWithIncompleteScanResult.forEach processResults@{ pkg -> val nestedProvenanceScanResult = scanResult.toNestedProvenanceScanResult(nestedProvenance) controller.addNestedScanResult(scanner, nestedProvenanceScanResult) // TODO: Run in coroutine. if (scanner.writeToStorage) { - storeNestedScanResult(pkg, nestedProvenanceScanResult) + storePackageScanResult(pkg, nestedProvenanceScanResult) + + nestedProvenanceScanResult.scanResults.forEach { (provenance, scanResults) -> + scanResults.forEach { scanResult -> + provenanceScanResultsToStore += provenance to scanResult + } + } } } + + // Store only deduplicated provenance scan results. + provenanceScanResultsToStore.forEach { (provenance, scanResult) -> + storeProvenanceScanResult(provenance, scanResult) + } } } } @@ -601,16 +613,6 @@ class Scanner( } } - private fun storeNestedScanResult(pkg: Package, nestedProvenanceScanResult: NestedProvenanceScanResult) { - storePackageScanResult(pkg, nestedProvenanceScanResult) - - nestedProvenanceScanResult.scanResults.forEach { (provenance, scanResults) -> - scanResults.forEach { scanResult -> - storeProvenanceScanResult(provenance, scanResult) - } - } - } - private fun storeProvenanceScanResult(provenance: KnownProvenance, scanResult: ScanResult) { storageWriters.filterIsInstance().forEach { writer -> runCatching { diff --git a/scanner/src/test/kotlin/ScannerTest.kt b/scanner/src/test/kotlin/ScannerTest.kt index 6b22bbbf50f43..5fade0e9c1adf 100644 --- a/scanner/src/test/kotlin/ScannerTest.kt +++ b/scanner/src/test/kotlin/ScannerTest.kt @@ -22,6 +22,7 @@ package org.ossreviewtoolkit.scanner import io.kotest.assertions.throwables.shouldThrow import io.kotest.core.spec.style.WordSpec import io.kotest.matchers.collections.beEmpty +import io.kotest.matchers.collections.shouldContainExactly import io.kotest.matchers.maps.beEmpty as beEmptyMap import io.kotest.matchers.maps.containExactly import io.kotest.matchers.nulls.shouldNotBeNull @@ -178,6 +179,35 @@ class ScannerTest : WordSpec({ verify(exactly = 0) { provenanceDownloader.download(any()) } } + + "store only a single scan result per provenance" { + val packageScanner = FakePackageScannerWrapper() + + val pkgWithVcs1 = Package.new(name = "project1").withValidVcs() + val pkgWithVcs2 = Package.new(name = "project2").withValidVcs() + val pkgWithVcs3 = Package.new(name = "project3").withValidVcs() + val packageScannerResult = createScanResult(pkgWithVcs1.repositoryProvenance(), packageScanner.details) + + val scannerWrapper = spyk(packageScanner) { + every { scanPackage(any(), any()) } returns packageScannerResult + } + + val storedScanResults = mutableListOf() + val writer = object : ProvenanceBasedScanStorageWriter { + override fun write(scanResult: ScanResult) { + storedScanResults.add(scanResult) + } + } + + val scanner = createScanner( + packageScannerWrappers = listOf(scannerWrapper), + storageWriters = listOf(writer) + ) + + scanner.scan(setOf(pkgWithVcs1, pkgWithVcs2, pkgWithVcs3), createContext()) + + storedScanResults shouldContainExactly listOf(packageScannerResult) + } } "Scanning with a provenance scanner" should {