Skip to content

Commit 86b66d5

Browse files
committed
feat(scancode): Add an option to prefer file- over line-level findings
See [1] for discussions about the `detected_license_expression_spdx`, in particular that it "is not merely the accumulation of the underlying matches". Optionally making use of this file-level license aligns ORT's behavior with that of the Double Open Scanner (DOS), see [2], which is useful for comparison of results. [1]: aboutcode-org/scancode-toolkit#3458 [2]: https://github.com/doubleopen-project/dos/blob/616c582/apps/api/src/helpers/db_operations.ts#L55-L78 Signed-off-by: Sebastian Schuberth <[email protected]>
1 parent c5be7ae commit 86b66d5

File tree

4 files changed

+57
-14
lines changed

4 files changed

+57
-14
lines changed

plugins/scanners/scancode/src/main/kotlin/ScanCode.kt

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ import org.semver4j.Semver
5555
* looking up results from the [ScanResultsStorage]. Defaults to [DEFAULT_CONFIGURATION_OPTIONS].
5656
* * **"commandLineNonConfig":** Command line options that do not modify the result and should therefore not be
5757
* considered in [configuration], like "--processes". Defaults to [DEFAULT_NON_CONFIGURATION_OPTIONS].
58+
* * **preferFileLicense**: A flag to indicate whether the "high-level" per-file license reported by ScanCode starting
59+
* with version 32 should be used instead of the individual "low-level" per-line license findings. The per-file
60+
* license may be different from the conjunction of per-line licenses and is supposed to contain fewer
61+
* false-positives. However, no exact line numbers can be associated to the per-file license anymore. If enabled, the
62+
* start line of the per-file license finding is set to the minimum of all start lines for per-line findings in that
63+
* file, the end line is set to the maximum of all end lines for per-line findings in that file, and the score is set
64+
* to the arithmetic average of the scores of all per-line findings in that file.
5865
*/
5966
class ScanCode internal constructor(
6067
name: String,
@@ -90,7 +97,14 @@ class ScanCode internal constructor(
9097
}
9198
}
9299

93-
override val configuration by lazy { config.commandLine.joinToString(" ") }
100+
override val configuration by lazy {
101+
buildList {
102+
addAll(config.commandLine)
103+
104+
// Add this in the style of a fake command line option for consistency with the above.
105+
if (config.preferFileLicense) add("--prefer-file-license")
106+
}.joinToString(" ")
107+
}
94108

95109
override val matcher by lazy { ScannerMatcher.create(details, wrapperConfig.matcherConfig) }
96110

@@ -140,7 +154,7 @@ class ScanCode internal constructor(
140154
}
141155

142156
override fun createSummary(result: String, startTime: Instant, endTime: Instant): ScanSummary =
143-
parseResult(result).toScanSummary()
157+
parseResult(result).toScanSummary(config.preferFileLicense)
144158

145159
/**
146160
* Execute ScanCode with the configured arguments to scan the given [path] and produce [resultFile].

plugins/scanners/scancode/src/main/kotlin/ScanCodeConfig.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ import org.ossreviewtoolkit.utils.common.splitOnWhitespace
2727

2828
data class ScanCodeConfig(
2929
val commandLine: List<String>,
30-
val commandLineNonConfig: List<String>
30+
val commandLineNonConfig: List<String>,
31+
val preferFileLicense: Boolean
3132
) {
3233
companion object {
3334
/**
@@ -60,7 +61,8 @@ data class ScanCodeConfig(
6061
ScanCodeConfig(
6162
options["commandLine"]?.splitOnWhitespace() ?: DEFAULT_COMMAND_LINE_OPTIONS,
6263
options["commandLineNonConfig"]?.splitOnWhitespace()
63-
?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS
64+
?: DEFAULT_COMMAND_LINE_NON_CONFIG_OPTIONS,
65+
options["preferFileLicense"].toBoolean()
6466
)
6567
}
6668
}

plugins/scanners/scancode/src/main/kotlin/ScanCodeResultModelMapper.kt

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ private data class LicenseMatch(
5858
val score: Float
5959
)
6060

61-
fun ScanCodeResult.toScanSummary(): ScanSummary {
61+
fun ScanCodeResult.toScanSummary(preferFileLicense: Boolean = false): ScanSummary {
6262
val licenseFindings = mutableSetOf<LicenseFinding>()
6363
val copyrightFindings = mutableSetOf<CopyrightFinding>()
6464
val issues = mutableListOf<Issue>()
@@ -91,19 +91,31 @@ fun ScanCodeResult.toScanSummary(): ScanSummary {
9191
it.value.first()
9292
}
9393

94-
licenses.mapTo(licenseFindings) { license ->
95-
// ScanCode uses its own license keys as identifiers in license expressions.
96-
val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
97-
98-
LicenseFinding(
99-
license = spdxLicenseExpression,
94+
if (preferFileLicense && file is FileEntry.Version3 && file.detectedLicenseExpressionSpdx != null) {
95+
licenseFindings += LicenseFinding(
96+
license = file.detectedLicenseExpressionSpdx,
10097
location = TextLocation(
10198
path = file.path,
102-
startLine = license.startLine,
103-
endLine = license.endLine
99+
startLine = licenses.minOf { it.startLine },
100+
endLine = licenses.maxOf { it.endLine }
104101
),
105-
score = license.score
102+
score = licenses.map { it.score }.average().toFloat()
106103
)
104+
} else {
105+
licenses.mapTo(licenseFindings) { license ->
106+
// ScanCode uses its own license keys as identifiers in license expressions.
107+
val spdxLicenseExpression = license.licenseExpression.mapLicense(scanCodeKeyToSpdxIdMappings)
108+
109+
LicenseFinding(
110+
license = spdxLicenseExpression,
111+
location = TextLocation(
112+
path = file.path,
113+
startLine = license.startLine,
114+
endLine = license.endLine
115+
),
116+
score = license.score
117+
)
118+
}
107119
}
108120

109121
file.copyrights.mapTo(copyrightFindings) { copyright ->

plugins/scanners/scancode/src/test/kotlin/ScanCodeResultParserTest.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import io.kotest.core.spec.style.FreeSpec
2323
import io.kotest.matchers.Matcher
2424
import io.kotest.matchers.collections.beEmpty
2525
import io.kotest.matchers.collections.containExactlyInAnyOrder
26+
import io.kotest.matchers.collections.shouldContainExactlyInAnyOrder
2627
import io.kotest.matchers.collections.shouldHaveSingleElement
2728
import io.kotest.matchers.collections.shouldHaveSize
2829
import io.kotest.matchers.should
@@ -86,6 +87,20 @@ class ScanCodeResultParserTest : FreeSpec({
8687
?.license.toString() shouldBe "GPL-2.0-only WITH GCC-exception-2.0"
8788
}
8889
}
90+
91+
"get file-level findings with the 'preferFileLicense' option" {
92+
val resultFile = getAssetFile("scancode-32.0.8_spdx-expression-parse_no-license-references.json")
93+
94+
val summary = parseResult(resultFile).toScanSummary(preferFileLicense = true)
95+
96+
summary.licenseFindings.map { it.license.toString() }.shouldContainExactlyInAnyOrder(
97+
"LicenseRef-scancode-generic-cla AND MIT",
98+
"MIT",
99+
"MIT",
100+
"GPL-2.0-only WITH GCC-exception-2.0 AND JSON AND BSD-2-Clause AND CC-BY-3.0 AND MIT",
101+
"GPL-2.0-only WITH GCC-exception-2.0 AND BSD-3-Clause"
102+
)
103+
}
89104
}
90105

91106
for (version in 1..MAX_SUPPORTED_OUTPUT_FORMAT_MAJOR_VERSION) {

0 commit comments

Comments
 (0)