diff --git a/model/src/main/resources/reference.yml b/model/src/main/resources/reference.yml index fedd2b1dd4268..c1b7c2b5ca1eb 100644 --- a/model/src/main/resources/reference.yml +++ b/model/src/main/resources/reference.yml @@ -229,6 +229,9 @@ ort: # Command line options that do not affect the ScanCode output. commandLineNonConfig: '--processes 4' + # Use per-file license findings instead of per-line ones. + preferFileLicense: false + # Criteria for matching stored scan results. These can be configured for any scanner that uses semantic # versioning. Note that the 'maxVersion' is exclusive and not part of the range of accepted versions. minVersion: '3.2.1-rc2' diff --git a/model/src/test/kotlin/config/OrtConfigurationTest.kt b/model/src/test/kotlin/config/OrtConfigurationTest.kt index c5439b95cae05..5daa5a4880964 100644 --- a/model/src/test/kotlin/config/OrtConfigurationTest.kt +++ b/model/src/test/kotlin/config/OrtConfigurationTest.kt @@ -255,6 +255,7 @@ class OrtConfigurationTest : WordSpec({ options shouldContainExactly mapOf( "commandLine" to "--copyright --license --info --strip-root --timeout 300", "commandLineNonConfig" to "--processes 4", + "preferFileLicense" to "false", "minVersion" to "3.2.1-rc2", "maxVersion" to "32.0.0" ) diff --git a/plugins/scanners/scancode/src/funTest/kotlin/ScanCodeScannerFunTest.kt b/plugins/scanners/scancode/src/funTest/kotlin/ScanCodeScannerFunTest.kt index ee894e7f2bd13..9ec252fe41f92 100644 --- a/plugins/scanners/scancode/src/funTest/kotlin/ScanCodeScannerFunTest.kt +++ b/plugins/scanners/scancode/src/funTest/kotlin/ScanCodeScannerFunTest.kt @@ -33,7 +33,7 @@ import org.ossreviewtoolkit.utils.spdx.getLicenseText import org.ossreviewtoolkit.utils.test.ExpensiveTag class ScanCodeScannerFunTest : AbstractPathScannerWrapperFunTest(setOf(ExpensiveTag)) { - override val scanner = ScanCode("ScanCode", ScanCodeConfig.EMPTY, ScannerWrapperConfig.EMPTY) + override val scanner = ScanCode("ScanCode", ScanCodeConfig.DEFAULT, ScannerWrapperConfig.EMPTY) override val expectedFileLicenses = listOf( LicenseFinding("Apache-2.0", TextLocation("LICENSE", 1, 187), 100.0f), diff --git a/plugins/scanners/scancode/src/main/kotlin/ScanCode.kt b/plugins/scanners/scancode/src/main/kotlin/ScanCode.kt index e06d18727f903..ec0bf1478b18a 100644 --- a/plugins/scanners/scancode/src/main/kotlin/ScanCode.kt +++ b/plugins/scanners/scancode/src/main/kotlin/ScanCode.kt @@ -22,8 +22,6 @@ package org.ossreviewtoolkit.plugins.scanners.scancode import java.io.File import java.time.Instant -import kotlin.math.max - import org.apache.logging.log4j.kotlin.logger import org.ossreviewtoolkit.model.ScanSummary @@ -40,7 +38,6 @@ import org.ossreviewtoolkit.utils.common.Options import org.ossreviewtoolkit.utils.common.Os import org.ossreviewtoolkit.utils.common.ProcessCapture import org.ossreviewtoolkit.utils.common.safeDeleteRecursively -import org.ossreviewtoolkit.utils.common.splitOnWhitespace import org.ossreviewtoolkit.utils.common.withoutPrefix import org.ossreviewtoolkit.utils.ort.createOrtTempDir @@ -55,49 +52,30 @@ import org.semver4j.Semver * configuration [options][PluginConfiguration.options]: * * * **"commandLine":** Command line options that modify the result. These are added to the [ScannerDetails] when - * looking up results from the [ScanResultsStorage]. Defaults to [DEFAULT_CONFIGURATION_OPTIONS]. + * looking up results from the [ScanResultsStorage]. Defaults to [ScanCodeConfig.DEFAULT_COMMAND_LINE_OPTIONS]. * * **"commandLineNonConfig":** Command line options that do not modify the result and should therefore not be - * considered in [configuration], like "--processes". Defaults to [DEFAULT_NON_CONFIGURATION_OPTIONS]. + * considered in [configuration], like "--processes". Defaults to + * [ScanCodeConfig.DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS]. + * * **preferFileLicense**: A flag to indicate whether the "high-level" per-file license reported by ScanCode starting + * with version 32 should be used instead of the individual "low-level" per-line license findings. The per-file + * license may be different from the conjunction of per-line licenses and is supposed to contain fewer + * false-positives. However, no exact line numbers can be associated to the per-file license anymore. If enabled, the + * start line of the per-file license finding is set to the minimum of all start lines for per-line findings in that + * file, the end line is set to the maximum of all end lines for per-line findings in that file, and the score is set + * to the arithmetic average of the scores of all per-line findings in that file. */ class ScanCode internal constructor( name: String, - config: ScanCodeConfig, + private val config: ScanCodeConfig, private val wrapperConfig: ScannerWrapperConfig ) : CommandLinePathScannerWrapper(name) { // This constructor is required by the `RequirementsCommand`. - constructor(name: String, wrapperConfig: ScannerWrapperConfig) : this(name, ScanCodeConfig.EMPTY, wrapperConfig) + constructor(name: String, wrapperConfig: ScannerWrapperConfig) : this(name, ScanCodeConfig.DEFAULT, wrapperConfig) companion object { const val SCANNER_NAME = "ScanCode" private const val LICENSE_REFERENCES_OPTION_VERSION = "32.0.0" - private const val OUTPUT_FORMAT = "json-pp" - private const val TIMEOUT = 300 - - /** - * Configuration options that are relevant for [configuration] because they change the result file. - */ - private val DEFAULT_CONFIGURATION_OPTIONS = listOf( - "--copyright", - "--license", - "--info", - "--strip-root", - "--timeout", TIMEOUT.toString() - ) - - /** - * Configuration options that are not relevant for [configuration] because they do not change the result - * file. - */ - private val DEFAULT_NON_CONFIGURATION_OPTIONS = listOf( - "--processes", max(1, Runtime.getRuntime().availableProcessors() - 1).toString() - ) - - private val OUTPUT_FORMAT_OPTION = if (OUTPUT_FORMAT.startsWith("json")) { - "--$OUTPUT_FORMAT" - } else { - "--output-$OUTPUT_FORMAT" - } } class Factory : ScannerWrapperFactory(SCANNER_NAME) { @@ -107,27 +85,12 @@ class ScanCode internal constructor( override fun parseConfig(options: Options, secrets: Options) = ScanCodeConfig.create(options) } - override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) } - - override val readFromStorage by lazy { wrapperConfig.readFromStorageWithDefault(matcher) } - - override val writeToStorage by lazy { wrapperConfig.writeToStorageWithDefault(matcher) } - - override val configuration by lazy { - buildList { - addAll(configurationOptions) - add(OUTPUT_FORMAT_OPTION) - }.joinToString(" ") - } - - private val configurationOptions = config.commandLine?.splitOnWhitespace() ?: DEFAULT_CONFIGURATION_OPTIONS - private val nonConfigurationOptions = config.commandLineNonConfig?.splitOnWhitespace() - ?: DEFAULT_NON_CONFIGURATION_OPTIONS + private val commandLineOptions by lazy { getCommandLineOptions(version) } internal fun getCommandLineOptions(version: String) = buildList { - addAll(configurationOptions) - addAll(nonConfigurationOptions) + addAll(config.commandLine) + addAll(config.commandLineNonConfig) if (Semver(version).isGreaterThanOrEqualTo(LICENSE_REFERENCES_OPTION_VERSION)) { // Required to be able to map ScanCode license keys to SPDX IDs. @@ -135,7 +98,20 @@ class ScanCode internal constructor( } } - val commandLineOptions by lazy { getCommandLineOptions(version) } + override val configuration by lazy { + buildList { + addAll(config.commandLine) + + // Add this in the style of a fake command line option for consistency with the above. + if (config.preferFileLicense) add("--prefer-file-license") + }.joinToString(" ") + } + + override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) } + + override val readFromStorage by lazy { wrapperConfig.readFromStorageWithDefault(matcher) } + + override val writeToStorage by lazy { wrapperConfig.writeToStorageWithDefault(matcher) } override fun command(workingDir: File?) = listOfNotNull(workingDir, if (Os.isWindows) "scancode.bat" else "scancode").joinToString(File.separator) @@ -179,7 +155,7 @@ class ScanCode internal constructor( } override fun createSummary(result: String, startTime: Instant, endTime: Instant): ScanSummary = - parseResult(result).toScanSummary() + parseResult(result).toScanSummary(config.preferFileLicense) /** * Execute ScanCode with the configured arguments to scan the given [path] and produce [resultFile]. @@ -188,8 +164,8 @@ class ScanCode internal constructor( ProcessCapture( command(), *commandLineOptions.toTypedArray(), - path.absolutePath, - OUTPUT_FORMAT_OPTION, - resultFile.absolutePath + // The output format option needs to directly precede the result file path. + "--json-pp", resultFile.absolutePath, + path.absolutePath ) } diff --git a/plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt b/plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt index 715dc394396b2..0a7e5c216d5c7 100644 --- a/plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt +++ b/plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt @@ -19,19 +19,49 @@ package org.ossreviewtoolkit.plugins.scanners.scancode +import kotlin.math.max +import kotlin.time.Duration.Companion.minutes + import org.ossreviewtoolkit.utils.common.Options +import org.ossreviewtoolkit.utils.common.splitOnWhitespace data class ScanCodeConfig( - val commandLine: String?, - val commandLineNonConfig: String? + val commandLine: List, + val commandLineNonConfig: List, + val preferFileLicense: Boolean ) { companion object { - val EMPTY = ScanCodeConfig(null, null) + /** + * The default time after which scanning a file is aborted. + */ + private val DEFAULT_TIMEOUT = 5.minutes + + /** + * The default list of command line options that might have an impact on the scan results. + */ + private val DEFAULT_COMMAND_LINE_OPTIONS = listOf( + "--copyright", + "--license", + "--info", + "--strip-root", + "--timeout", "${DEFAULT_TIMEOUT.inWholeSeconds}" + ) + + /** + * The default list of command line options that cannot have an impact on the scan results. + */ + private val DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS = listOf( + "--processes", max(1, Runtime.getRuntime().availableProcessors() - 1).toString() + ) - private const val COMMAND_LINE_PROPERTY = "commandLine" - private const val COMMAND_LINE_NON_CONFIG_PROPERTY = "commandLineNonConfig" + val DEFAULT = create(emptyMap()) fun create(options: Options) = - ScanCodeConfig(options[COMMAND_LINE_PROPERTY], options[COMMAND_LINE_NON_CONFIG_PROPERTY]) + ScanCodeConfig( + options["commandLine"]?.splitOnWhitespace() ?: DEFAULT_COMMAND_LINE_OPTIONS, + options["commandLineNonConfig"]?.splitOnWhitespace() + ?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS, + options["preferFileLicense"].toBoolean() + ) } } diff --git a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt index 4f925e9b9a45e..6ec839b9880ae 100644 --- a/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt +++ b/plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt @@ -58,7 +58,7 @@ private data class LicenseMatch( val score: Float ) -fun ScanCodeResult.toScanSummary(): ScanSummary { +fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummary { val licenseFindings = mutableSetOf() val copyrightFindings = mutableSetOf() val issues = mutableListOf() @@ -91,19 +91,31 @@ fun ScanCodeResult.toScanSummary(): ScanSummary { it.value.first() } - licenses.mapTo(licenseFindings) { license -> - // ScanCode uses its own license keys as identifiers in license expressions. - val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings) - - LicenseFinding( - license = spdxLicenseExpression, + if (preferFileLicense && file is FileEntry.Version3 && file.detectedLicenseExpressionSpdx != null) { + licenseFindings += LicenseFinding( + license = file.detectedLicenseExpressionSpdx, location = TextLocation( path = file.path, - startLine = license.startLine, - endLine = license.endLine + startLine = licenses.minOf { it.startLine }, + endLine = licenses.maxOf { it.endLine } ), - score = license.score + score = licenses.map { it.score }.average().toFloat() ) + } else { + licenses.mapTo(licenseFindings) { license -> + // ScanCode uses its own license keys as identifiers in license expressions. + val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings) + + LicenseFinding( + license = spdxLicenseExpression, + location = TextLocation( + path = file.path, + startLine = license.startLine, + endLine = license.endLine + ), + score = license.score + ) + } } file.copyrights.mapTo(copyrightFindings) { copyright -> diff --git a/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt b/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt index 92faf13153bee..3828200795f54 100644 --- a/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt +++ b/plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt @@ -23,6 +23,7 @@ import io.kotest.core.spec.style.FreeSpec import io.kotest.matchers.Matcher import io.kotest.matchers.collections.beEmpty import io.kotest.matchers.collections.containExactlyInAnyOrder +import io.kotest.matchers.collections.shouldContainExactlyInAnyOrder import io.kotest.matchers.collections.shouldHaveSingleElement import io.kotest.matchers.collections.shouldHaveSize import io.kotest.matchers.should @@ -80,9 +81,25 @@ class ScanCodeResultParserTest : FreeSpec({ val summary = parseResult(resultFile).toScanSummary() - summary.licenseFindings.find { - it.location == TextLocation("README.md", 100) && it.score == 100.0f - }?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0" + with(summary.licenseFindings) { + shouldHaveSize(18) + find { it.location == TextLocation("README.md", 100) && it.score == 100.0f } + ?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0" + } + } + + "get file-level findings with the 'preferFileLicense' option" { + val resultFile = getAssetFile("scancode-32.0.8_spdx-expression-parse_no-license-references.json") + + val summary = parseResult(resultFile).toScanSummary(preferFileLicense = true) + + summary.licenseFindings.map { it.license.toString() }.shouldContainExactlyInAnyOrder( + "LicenseRef-scancode-generic-cla AND MIT", + "MIT", + "MIT", + "GPL-2.0-only WITH GCC-exception-2.0 AND JSON AND BSD-2-Clause AND CC-BY-3.0 AND MIT", + "GPL-2.0-only WITH GCC-exception-2.0 AND BSD-3-Clause" + ) } } diff --git a/plugins/scanners/scancode/src/test/kotlin/ScanCodeTest.kt b/plugins/scanners/scancode/src/test/kotlin/ScanCodeTest.kt index 271af10181f07..41cea589096b3 100644 --- a/plugins/scanners/scancode/src/test/kotlin/ScanCodeTest.kt +++ b/plugins/scanners/scancode/src/test/kotlin/ScanCodeTest.kt @@ -39,24 +39,24 @@ import org.ossreviewtoolkit.scanner.ScannerWrapperConfig import org.ossreviewtoolkit.utils.common.ProcessCapture class ScanCodeTest : WordSpec({ - val scanner = ScanCode("ScanCode", ScanCodeConfig.EMPTY, ScannerWrapperConfig.EMPTY) + val scanner = ScanCode("ScanCode", ScanCodeConfig.DEFAULT, ScannerWrapperConfig.EMPTY) "configuration" should { "return the default values if the scanner configuration is empty" { - scanner.configuration shouldBe "--copyright --license --info --strip-root --timeout 300 --json-pp" + scanner.configuration shouldBe "--copyright --license --info --strip-root --timeout 300" } "return the non-config values from the scanner configuration" { - val scannerWithConfig = ScanCode( - "ScanCode", - ScanCodeConfig( - commandLine = "--command --line", - commandLineNonConfig = "--commandLineNonConfig" - ), - ScannerWrapperConfig.EMPTY + val config = ScanCodeConfig.create( + mapOf( + "commandLine" to "--command --line", + "commandLineNonConfig" to "--commandLineNonConfig" + ) ) - scannerWithConfig.configuration shouldBe "--command --line --json-pp" + val scannerWithConfig = ScanCode("ScanCode", config, ScannerWrapperConfig.EMPTY) + + scannerWithConfig.configuration shouldBe "--command --line" } } @@ -69,29 +69,29 @@ class ScanCodeTest : WordSpec({ } "contain the values from the scanner configuration" { - val scannerWithConfig = ScanCode( - "ScanCode", - ScanCodeConfig( - commandLine = "--command --line", - commandLineNonConfig = "--commandLineNonConfig" - ), - ScannerWrapperConfig.EMPTY + val config = ScanCodeConfig.create( + mapOf( + "commandLine" to "--command --line", + "commandLineNonConfig" to "--commandLineNonConfig" + ) ) + val scannerWithConfig = ScanCode("ScanCode", config, ScannerWrapperConfig.EMPTY) + scannerWithConfig.getCommandLineOptions("31.2.4").joinToString(" ") shouldBe "--command --line --commandLineNonConfig" } "be handled correctly when containing multiple spaces" { - val scannerWithConfig = ScanCode( - "ScanCode", - ScanCodeConfig( - commandLine = " --command --line ", - commandLineNonConfig = " -n -c " - ), - ScannerWrapperConfig.EMPTY + val config = ScanCodeConfig.create( + mapOf( + "commandLine" to " --command --line ", + "commandLineNonConfig" to " -n -c " + ) ) + val scannerWithConfig = ScanCode("ScanCode", config, ScannerWrapperConfig.EMPTY) + scannerWithConfig.getCommandLineOptions("31.2.4") shouldBe listOf("--command", "--line", "-n", "-c") } }