Skip to content

Commit 327672e

Browse files
committed
Merge pull request #421: Sparse index: integrate with the sparse-checkout builtin
This integrates the `sparse-checkout` builtin with the sparse index. The tricky part here is that we need to partially expand the index when we are modifying the sparse-checkout definition. Note that we modify the pattern list in a careful way: we create a `struct pattern_list` in-memory in `builtin/sparse-checkout.c` then apply those patterns to the index before writing the patterns to the sparse-checkout file. The `update_sparsity()` method does the work to assign the `SKIP_WORKTREE` bit appropriately, but this doesn't work if the files that are within the new sparse-checkout cone are still hidden behind a sparse directory. The new `expand_to_pattern_list()` method does the hard work of expanding the sparse directories that are now within the new patterns. This expands only as far as needed, possibly creating new sparse directory entries. This method does not contract existing files to sparse directories, and a big reason why is because of the check for ignored files as we delete those directories. The `clean_tracked_sparse_directories()` method is called after `update_sparsity()`, but we need to read the `A/B/.gitignore` file (or lack thereof) before we can delete `A/B/`. If we convert to sparse too quickly, then we lose this information and cause a full expansion. Most of the correctness is handled by existing tests in `t1092`, but I add checks for `ensure_not_expanded` in some hopefully interesting cases. As for performance, `git sparse-checkout set` can be slow if it needs to move a lot of files. However, no-op `git sparse-checkout set` (i.e. set the sparse-checkout cone to only include files at root, and do this on repeat) has these performance results on Linux in a monorepo with 2+ million files at `HEAD`: ``` Benchmark #1: baseline Time (mean ± σ): 10.465 s ± 0.018 s [User: 9.885 s, System: 0.573 s] Range (min … max): 10.450 s … 10.497 s 5 runs Benchmark #2: new code Time (mean ± σ): 68.9 ms ± 2.9 ms [User: 45.8 ms, System: 17.1 ms] Range (min … max): 63.4 ms … 74.0 ms 41 runs Summary 'new code' ran 151.89 ± 6.30 times faster than 'baseline' ```
2 parents 9f14b36 + 7f9b71d commit 327672e

File tree

2 files changed

+53
-34
lines changed

2 files changed

+53
-34
lines changed

dir.c

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,46 +1400,16 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
14001400
return res;
14011401
}
14021402

1403-
/*
1404-
* Scan the list of patterns to determine if the ordered list
1405-
* of patterns matches on 'pathname'.
1406-
*
1407-
* Return 1 for a match, 0 for not matched and -1 for undecided.
1408-
*/
1409-
enum pattern_match_result path_matches_pattern_list(
1403+
enum pattern_match_result path_matches_cone_mode_pattern_list(
14101404
const char *pathname, int pathlen,
1411-
const char *basename, int *dtype,
1412-
struct pattern_list *pl,
1413-
struct index_state *istate)
1405+
struct pattern_list *pl)
14141406
{
1415-
struct path_pattern *pattern;
14161407
struct strbuf parent_pathname = STRBUF_INIT;
14171408
int result = NOT_MATCHED;
14181409
size_t slash_pos;
14191410

1420-
/*
1421-
* The virtual file system data is used to prevent git from traversing
1422-
* any part of the tree that is not in the virtual file system. Return
1423-
* 1 to exclude the entry if it is not found in the virtual file system,
1424-
* else fall through to the regular excludes logic as it may further exclude.
1425-
*/
1426-
if (*dtype == DT_UNKNOWN)
1427-
*dtype = resolve_dtype(DT_UNKNOWN, istate, pathname, pathlen);
1428-
if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0)
1429-
return 1;
1430-
1431-
if (!pl->use_cone_patterns) {
1432-
pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
1433-
dtype, pl, istate);
1434-
if (pattern) {
1435-
if (pattern->flags & PATTERN_FLAG_NEGATIVE)
1436-
return NOT_MATCHED;
1437-
else
1438-
return MATCHED;
1439-
}
1440-
1441-
return UNDECIDED;
1442-
}
1411+
if (!pl->use_cone_patterns)
1412+
BUG("path_matches_cone_mode_pattern_list requires cone mode patterns");
14431413

14441414
if (pl->full_cone)
14451415
return MATCHED;
@@ -1492,6 +1462,46 @@ enum pattern_match_result path_matches_pattern_list(
14921462
return result;
14931463
}
14941464

1465+
/*
1466+
* Scan the list of patterns to determine if the ordered list
1467+
* of patterns matches on 'pathname'.
1468+
*
1469+
* Return 1 for a match, 0 for not matched and -1 for undecided.
1470+
*/
1471+
enum pattern_match_result path_matches_pattern_list(
1472+
const char *pathname, int pathlen,
1473+
const char *basename, int *dtype,
1474+
struct pattern_list *pl,
1475+
struct index_state *istate)
1476+
{
1477+
/*
1478+
* The virtual file system data is used to prevent git from traversing
1479+
* any part of the tree that is not in the virtual file system. Return
1480+
* 1 to exclude the entry if it is not found in the virtual file system,
1481+
* else fall through to the regular excludes logic as it may further exclude.
1482+
*/
1483+
if (*dtype == DT_UNKNOWN)
1484+
*dtype = resolve_dtype(DT_UNKNOWN, istate, pathname, pathlen);
1485+
if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0)
1486+
return 1;
1487+
1488+
if (!pl->use_cone_patterns) {
1489+
struct path_pattern *pattern = last_matching_pattern_from_list(
1490+
pathname, pathlen, basename,
1491+
dtype, pl, istate);
1492+
if (pattern) {
1493+
if (pattern->flags & PATTERN_FLAG_NEGATIVE)
1494+
return NOT_MATCHED;
1495+
else
1496+
return MATCHED;
1497+
}
1498+
1499+
return UNDECIDED;
1500+
}
1501+
1502+
return path_matches_cone_mode_pattern_list(pathname, pathlen, pl);
1503+
}
1504+
14951505
int init_sparse_checkout_patterns(struct index_state *istate)
14961506
{
14971507
if (!core_apply_sparse_checkout)

dir.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,15 @@ enum pattern_match_result {
383383
MATCHED_RECURSIVE = 2,
384384
};
385385

386+
/*
387+
* Test if a given path is contained in the given pattern list.
388+
*
389+
* The given pattern list _must_ use cone mode patterns.
390+
*/
391+
enum pattern_match_result path_matches_cone_mode_pattern_list(
392+
const char *pathname, int pathlen,
393+
struct pattern_list *pl);
394+
386395
/*
387396
* Scan the list of patterns to determine if the ordered list
388397
* of patterns matches on 'pathname'.

0 commit comments

Comments
 (0)