Skip to content

Commit c15ce99

Browse files
committed
feat(vcs): Add Git-specific configuration options for submodule handling
For large repositories with many layers of nested Git submodules, the download process can be very time-consuming and often results in duplicate projects in the tree of nested submodules. This feature introduces configuration options to limit the recursive checkout of nested Git submodules to the first layer, optimizing performance and reducing redundancy. Additionally, it also allows to limit the depth of commit history to fetch when downloading the projects. Signed-off-by: Wolfgang Klenk <[email protected]>
1 parent d352301 commit c15ce99

File tree

5 files changed

+99
-32
lines changed

5 files changed

+99
-32
lines changed

downloader/src/test/kotlin/VersionControlSystemTest.kt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import org.ossreviewtoolkit.model.Package
3434
import org.ossreviewtoolkit.model.VcsInfo
3535
import org.ossreviewtoolkit.model.VcsType
3636
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.Git
37+
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.GitConfiguration
3738
import org.ossreviewtoolkit.utils.common.CommandLineTool
3839

3940
class VersionControlSystemTest : WordSpec({
@@ -87,7 +88,7 @@ class VersionControlSystemTest : WordSpec({
8788

8889
every { workingTree.guessRevisionName(any(), any()) } returns "v1.6.0"
8990

90-
Git.Factory().create(VersionControlSystemConfiguration())
91+
Git.Factory().create(GitConfiguration())
9192
.getRevisionCandidates(workingTree, pkg, allowMovingRevisions = true) shouldBeSuccess listOf(
9293
"v1.6.0"
9394
)
@@ -111,7 +112,7 @@ class VersionControlSystemTest : WordSpec({
111112
every { workingTree.listRemoteBranches() } returns listOf("main")
112113
every { workingTree.listRemoteTags() } returns emptyList()
113114

114-
Git.Factory().create(VersionControlSystemConfiguration())
115+
Git.Factory().create(GitConfiguration())
115116
.getRevisionCandidates(workingTree, pkg, allowMovingRevisions = true) shouldBeSuccess listOf(
116117
"master",
117118
"main"

plugins/version-control-systems/git/src/main/kotlin/Git.kt

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import java.security.PublicKey
2727

2828
import org.apache.logging.log4j.kotlin.logger
2929

30-
import org.eclipse.jgit.api.Git
30+
import org.eclipse.jgit.api.Git as JGit
3131
import org.eclipse.jgit.api.LsRemoteCommand
3232
import org.eclipse.jgit.api.errors.GitAPIException
3333
import org.eclipse.jgit.errors.UnsupportedCredentialItem
@@ -45,7 +45,6 @@ import org.eclipse.jgit.transport.sshd.ServerKeyDatabase
4545
import org.eclipse.jgit.transport.sshd.SshdSessionFactory
4646

4747
import org.ossreviewtoolkit.downloader.VersionControlSystem
48-
import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration
4948
import org.ossreviewtoolkit.downloader.VersionControlSystemFactory
5049
import org.ossreviewtoolkit.downloader.WorkingTree
5150
import org.ossreviewtoolkit.model.VcsInfo
@@ -62,9 +61,6 @@ import org.ossreviewtoolkit.utils.ort.showStackTrace
6261
import org.semver4j.RangesList
6362
import org.semver4j.RangesListFactory
6463

65-
// TODO: Make this configurable.
66-
const val GIT_HISTORY_DEPTH = 50
67-
6864
// Replace prefixes of Git submodule repository URLs.
6965
private val REPOSITORY_URL_PREFIX_REPLACEMENTS = listOf(
7066
"git://" to "https://"
@@ -87,8 +83,10 @@ object GitCommand : CommandLineTool {
8783
override fun displayName(): String = "Git"
8884
}
8985

90-
class Git : VersionControlSystem(GitCommand) {
86+
class Git internal constructor(private val vcsConfig: GitConfiguration = GitConfiguration()) :
87+
VersionControlSystem(GitCommand) {
9188
companion object {
89+
9290
init {
9391
// Make sure that JGit uses the exact same authentication information as ORT itself. This addresses
9492
// discrepancies in the way .netrc files are interpreted between JGit's and ORT's implementation.
@@ -122,20 +120,24 @@ class Git : VersionControlSystem(GitCommand) {
122120
override val type = VcsType.GIT.toString()
123121
override val latestRevisionNames = listOf("HEAD", "@")
124122

125-
class Factory : VersionControlSystemFactory<VersionControlSystemConfiguration>(VcsType.GIT.toString(), 100) {
126-
override fun create(config: VersionControlSystemConfiguration): VersionControlSystem {
127-
return Git()
123+
class Factory : VersionControlSystemFactory<GitConfiguration>(VcsType.GIT.toString(), 100) {
124+
override fun create(config: GitConfiguration): VersionControlSystem {
125+
logger.info("Creating Git VCS with configuration $config.")
126+
return Git(config)
128127
}
129128

130-
override fun parseConfig(options: Options, secrets: Options): VersionControlSystemConfiguration {
131-
return VersionControlSystemConfiguration() // No specific Subversion configuration yet.
129+
override fun parseConfig(options: Options, secrets: Options): GitConfiguration {
130+
return GitConfiguration(
131+
submoduleHistoryDepth = options["submoduleHistoryDepth"]?.toIntOrNull() ?: 50,
132+
updateNestedSubmodules = options["updateNestedSubmodules"]?.toBoolean() ?: true
133+
)
132134
}
133135
}
134136

135137
override fun getVersion() = GitCommand.getVersion(null)
136138

137139
override fun getDefaultBranchName(url: String): String {
138-
val refs = Git.lsRemoteRepository().setRemote(url).callAsMap()
140+
val refs = JGit.lsRemoteRepository().setRemote(url).callAsMap()
139141
return (refs["HEAD"] as? SymbolicRef)?.target?.name?.removePrefix("refs/heads/") ?: "master"
140142
}
141143

@@ -150,7 +152,7 @@ class Git : VersionControlSystem(GitCommand) {
150152

151153
override fun initWorkingTree(targetDir: File, vcs: VcsInfo): WorkingTree {
152154
try {
153-
Git.init().setDirectory(targetDir).call().use { git ->
155+
JGit.init().setDirectory(targetDir).call().use { git ->
154156
git.remoteAdd().setName("origin").setUri(URIish(vcs.url)).call()
155157

156158
if (Os.isWindows) {
@@ -185,12 +187,21 @@ class Git : VersionControlSystem(GitCommand) {
185187
recursive: Boolean
186188
): Result<String> =
187189
(workingTree as GitWorkingTree).useRepo {
188-
Git(this).use { git ->
190+
JGit(this).use { git ->
189191
logger.info { "Updating working tree from ${workingTree.getRemoteUrl()}." }
190192

191-
updateWorkingTreeWithoutSubmodules(workingTree, git, revision).mapCatching {
193+
val historyDepth = vcsConfig.submoduleHistoryDepth
194+
updateWorkingTreeWithoutSubmodules(workingTree, git, revision, historyDepth).mapCatching {
192195
// In case this throws the exception gets encapsulated as a failure.
193-
if (recursive) updateSubmodules(workingTree)
196+
if (recursive) {
197+
val updateNestedSubmodules =
198+
vcsConfig.updateNestedSubmodules
199+
updateSubmodules(
200+
workingTree,
201+
recursive = updateNestedSubmodules,
202+
historyDepth = historyDepth
203+
)
204+
}
194205

195206
revision
196207
}
@@ -199,13 +210,14 @@ class Git : VersionControlSystem(GitCommand) {
199210

200211
private fun updateWorkingTreeWithoutSubmodules(
201212
workingTree: WorkingTree,
202-
git: Git,
203-
revision: String
213+
git: JGit,
214+
revision: String,
215+
historyDepth: Int
204216
): Result<String> =
205217
runCatching {
206-
logger.info { "Trying to fetch only revision '$revision' with depth limited to $GIT_HISTORY_DEPTH." }
218+
logger.info { "Trying to fetch only revision '$revision' with depth limited to $historyDepth." }
207219

208-
val fetch = git.fetch().setDepth(GIT_HISTORY_DEPTH)
220+
val fetch = git.fetch().setDepth(historyDepth)
209221

210222
// See https://git-scm.com/docs/gitrevisions#_specifying_revisions for how Git resolves ambiguous
211223
// names. In particular, tag names have higher precedence than branch names.
@@ -223,13 +235,13 @@ class Git : VersionControlSystem(GitCommand) {
223235
it.showStackTrace()
224236

225237
logger.info { "Could not fetch only revision '$revision': ${it.collectMessages()}" }
226-
logger.info { "Falling back to fetching all refs with depth limited to $GIT_HISTORY_DEPTH." }
238+
logger.info { "Falling back to fetching all refs with depth limited to $historyDepth." }
227239

228-
git.fetch().setDepth(GIT_HISTORY_DEPTH).setTagOpt(TagOpt.FETCH_TAGS).call()
240+
git.fetch().setDepth(historyDepth).setTagOpt(TagOpt.FETCH_TAGS).call()
229241
}.recoverCatching {
230242
it.showStackTrace()
231243

232-
logger.info { "Could not fetch with only a depth of $GIT_HISTORY_DEPTH: ${it.collectMessages()}" }
244+
logger.info { "Could not fetch with only a depth of $historyDepth: ${it.collectMessages()}" }
233245
logger.info { "Falling back to fetch everything including tags." }
234246

235247
git.fetch().setUnshallow(true).setTagOpt(TagOpt.FETCH_TAGS).call()
@@ -284,7 +296,14 @@ class Git : VersionControlSystem(GitCommand) {
284296
revision
285297
}
286298

287-
private fun updateSubmodules(workingTree: WorkingTree) {
299+
/**
300+
* Initialize, update, and clone all the submodules in a working tree.
301+
*
302+
* If [recursive] is set to true, then the operations are not only performed on the
303+
* submodules in the top-level of the working tree, but also on the submodules of the submodules, and so on.
304+
* If [recursive] is set to false, only the submodules on the top-level are initialized, updated, and cloned.
305+
*/
306+
private fun updateSubmodules(workingTree: WorkingTree, recursive: Boolean, historyDepth: Int) {
288307
if (!workingTree.getRootPath().resolve(".gitmodules").isFile) return
289308

290309
val insteadOf = REPOSITORY_URL_PREFIX_REPLACEMENTS.map { (prefix, replacement) ->
@@ -293,14 +312,27 @@ class Git : VersionControlSystem(GitCommand) {
293312

294313
runCatching {
295314
// TODO: Migrate this to JGit once https://bugs.eclipse.org/bugs/show_bug.cgi?id=580731 is implemented.
296-
workingTree.runGit("submodule", "update", "--init", "--recursive", "--depth", "$GIT_HISTORY_DEPTH")
315+
val updateArgs = mutableListOf("submodule", "update", "--init", "--depth", "$historyDepth").apply {
316+
if (recursive) { add("--recursive") }
317+
}
318+
319+
workingTree.runGit(*updateArgs.toTypedArray())
297320

298321
insteadOf.forEach {
299-
workingTree.runGit("submodule", "foreach", "--recursive", "git config $it")
322+
val foreachArgs = mutableListOf("submodule", "foreach").apply {
323+
if (recursive) { add("--recursive") }
324+
add("git config $it")
325+
}
326+
327+
workingTree.runGit(*foreachArgs.toTypedArray())
300328
}
301329
}.recover {
302330
// As Git's dumb HTTP transport does not support shallow capabilities, also try to not limit the depth.
303-
workingTree.runGit("submodule", "update", "--recursive")
331+
val fallbackArgs = mutableListOf("submodule", "update").apply {
332+
if (recursive) { add("--recursive") }
333+
}
334+
335+
workingTree.runGit(*fallbackArgs.toTypedArray())
304336
}
305337
}
306338

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright (C) 2024 The ORT Project Authors (see <https://github.com/oss-review-toolkit/ort/blob/main/NOTICE>)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-License-Identifier: Apache-2.0
17+
* License-Filename: LICENSE
18+
*/
19+
package org.ossreviewtoolkit.plugins.versioncontrolsystems.git
20+
21+
import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration
22+
23+
data class GitConfiguration(
24+
/**
25+
* Depth of the commit history to fetch when updating submodules
26+
*/
27+
val submoduleHistoryDepth: Int = 50,
28+
29+
/**
30+
* Whether nested submodules should be updated, or if only the submodules
31+
* on the first layer should be considered.
32+
*/
33+
val updateNestedSubmodules: Boolean = true
34+
) : VersionControlSystemConfiguration()

plugins/version-control-systems/git/src/main/kotlin/GitRepo.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ class GitRepo internal constructor() : VersionControlSystem(GitRepoCommand) {
150150

151151
paths.forEach { path ->
152152
// Add the nested Repo project.
153-
val workingTree = Git.Factory().create(VersionControlSystemConfiguration())
153+
val workingTree = Git.Factory().create(GitConfiguration())
154154
.getWorkingTree(getRootPath().resolve(path))
155155
nested[path] = workingTree.getInfo()
156156

utils/common/src/funTest/kotlin/SafeDeleteRecursivelyFunTest.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ import io.kotest.matchers.shouldBe
2626

2727
import java.io.IOException
2828

29-
import org.ossreviewtoolkit.downloader.VersionControlSystemConfiguration
3029
import org.ossreviewtoolkit.model.Package
3130
import org.ossreviewtoolkit.model.VcsInfo
3231
import org.ossreviewtoolkit.model.VcsType
3332
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.Git
33+
import org.ossreviewtoolkit.plugins.versioncontrolsystems.git.GitConfiguration
3434

3535
class SafeDeleteRecursivelyFunTest : WordSpec({
3636
"File.safeDeleteRecursively()" should {
@@ -59,7 +59,7 @@ class SafeDeleteRecursivelyFunTest : WordSpec({
5959
)
6060

6161
val nodeDir = tempdir().resolve("node-dir")
62-
Git.Factory().create(VersionControlSystemConfiguration())
62+
Git.Factory().create(GitConfiguration())
6363
.download(pkg, nodeDir)
6464

6565
shouldNotThrow<IOException> {

0 commit comments

Comments
 (0)