Skip to content

Commit 9c1830c

Browse files
committed
do not recurse into trees during fsck
1 parent 7a88b42 commit 9c1830c

File tree

1 file changed

+38
-92
lines changed

1 file changed

+38
-92
lines changed

gix-fsck/src/lib.rs

Lines changed: 38 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
use gix_hash::ObjectId;
55
use gix_hashtable::HashSet;
66
use gix_object::{tree::EntryMode, Exists, FindExt, Kind};
7-
use std::cell::RefCell;
8-
use std::ops::{Deref, DerefMut};
7+
use std::collections::VecDeque;
98

109
/// Perform a connectivity check.
1110
pub struct Connectivity<T, F>
@@ -18,9 +17,9 @@ where
1817
/// Closure to invoke when a missing object is encountered
1918
missing_cb: F,
2019
/// Set of Object IDs already (or about to be) scanned during the check
21-
oid_set: HashSet,
22-
/// A free-list of buffers for recursive tree decoding.
23-
free_list: FreeList,
20+
seen: HashSet,
21+
/// A buffer to keep a single object at a time.
22+
buf: Vec<u8>,
2423
}
2524

2625
impl<T, F> Connectivity<T, F>
@@ -33,8 +32,8 @@ where
3332
Connectivity {
3433
db,
3534
missing_cb,
36-
oid_set: HashSet::default(),
37-
free_list: Default::default(),
35+
seen: HashSet::default(),
36+
buf: Default::default(),
3837
}
3938
}
4039

@@ -49,113 +48,60 @@ where
4948
/// - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?)
5049
pub fn check_commit(&mut self, oid: &ObjectId) -> Result<(), gix_object::find::existing_object::Error> {
5150
// Attempt to insert the commit ID in the set, and if already present, return immediately
52-
if !self.oid_set.insert(*oid) {
51+
if !self.seen.insert(*oid) {
5352
return Ok(());
5453
}
5554
// Obtain the commit's tree ID
5655
let tree_id = {
57-
let mut buf = self.free_list.buf();
58-
let commit = self.db.find_commit(oid, &mut buf)?;
56+
let commit = self.db.find_commit(oid, &mut self.buf)?;
5957
commit.tree()
6058
};
6159

62-
if self.oid_set.insert(tree_id) {
63-
check_tree(
64-
&tree_id,
65-
&self.db,
66-
&mut self.free_list,
67-
&mut self.missing_cb,
68-
&mut self.oid_set,
69-
);
60+
let mut tree_ids = VecDeque::from_iter(Some(tree_id));
61+
while let Some(tree_id) = tree_ids.pop_front() {
62+
if self.seen.insert(tree_id) {
63+
self.check_tree(&tree_id, &mut tree_ids);
64+
}
7065
}
7166

7267
Ok(())
7368
}
74-
}
7569

76-
#[derive(Default)]
77-
struct FreeList(RefCell<Vec<Vec<u8>>>);
78-
79-
impl FreeList {
80-
fn buf(&self) -> ReturnToFreeListOnDrop<'_> {
81-
let buf = self.0.borrow_mut().pop().unwrap_or_default();
82-
ReturnToFreeListOnDrop { buf, list: &self.0 }
83-
}
84-
}
85-
86-
struct ReturnToFreeListOnDrop<'a> {
87-
list: &'a RefCell<Vec<Vec<u8>>>,
88-
buf: Vec<u8>,
89-
}
70+
/// Blobs are checked right away, trees are stored in `tree_ids` for the parent to iterate them, and only
71+
/// if they have not been `seen` yet.
72+
fn check_tree(&mut self, oid: &ObjectId, tree_ids: &mut VecDeque<ObjectId>) {
73+
let Ok(tree) = self.db.find_tree(oid, &mut self.buf) else {
74+
(self.missing_cb)(oid, Kind::Tree);
75+
return;
76+
};
9077

91-
impl Drop for ReturnToFreeListOnDrop<'_> {
92-
fn drop(&mut self) {
93-
if !self.buf.is_empty() {
94-
self.list.borrow_mut().push(std::mem::take(&mut self.buf));
78+
for entry_ref in tree.entries.iter() {
79+
match entry_ref.mode {
80+
EntryMode::Tree => {
81+
let tree_id = entry_ref.oid.to_owned();
82+
if self.seen.insert(tree_id) {
83+
tree_ids.push_back(tree_id);
84+
}
85+
}
86+
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => {
87+
let blob_id = entry_ref.oid.to_owned();
88+
if self.seen.insert(blob_id) {
89+
check_blob(&self.db, &blob_id, &mut self.missing_cb);
90+
}
91+
}
92+
EntryMode::Commit => {
93+
// Skip submodules as it's not in this repository!
94+
}
95+
}
9596
}
9697
}
9798
}
9899

99-
impl Deref for ReturnToFreeListOnDrop<'_> {
100-
type Target = Vec<u8>;
101-
102-
fn deref(&self) -> &Self::Target {
103-
&self.buf
104-
}
105-
}
106-
107-
impl DerefMut for ReturnToFreeListOnDrop<'_> {
108-
fn deref_mut(&mut self) -> &mut Self::Target {
109-
&mut self.buf
110-
}
111-
}
112-
113100
fn check_blob<F>(db: impl Exists, oid: &ObjectId, mut missing_cb: F)
114101
where
115102
F: FnMut(&ObjectId, Kind),
116103
{
117-
// Check if the blob is missing from the ODB
118104
if !db.exists(oid) {
119-
// Blob is missing, so invoke `missing_cb`
120105
missing_cb(oid, Kind::Blob);
121106
}
122107
}
123-
124-
fn check_tree<F>(
125-
oid: &ObjectId,
126-
db: &(impl FindExt + Exists),
127-
list: &FreeList,
128-
missing_cb: &mut F,
129-
oid_set: &mut HashSet,
130-
) where
131-
F: FnMut(&ObjectId, Kind),
132-
{
133-
let mut buf = list.buf();
134-
let Ok(tree) = db.find_tree(oid, &mut buf) else {
135-
missing_cb(oid, Kind::Tree);
136-
return;
137-
};
138-
139-
// Build up a set of trees and a set of blobs
140-
// For each entry in the tree
141-
for entry_ref in tree.entries.iter() {
142-
match entry_ref.mode {
143-
EntryMode::Tree => {
144-
let tree_id = entry_ref.oid.to_owned();
145-
if oid_set.insert(tree_id) {
146-
check_tree(&tree_id, &*db, list, &mut *missing_cb, oid_set);
147-
}
148-
}
149-
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => {
150-
let blob_id = entry_ref.oid.to_owned();
151-
if oid_set.insert(blob_id) {
152-
check_blob(&*db, &blob_id, &mut *missing_cb);
153-
}
154-
}
155-
EntryMode::Commit => {
156-
// This implies a submodule (OID is the commit hash of the submodule)
157-
// Skip it as it's not in this repository!
158-
}
159-
}
160-
}
161-
}

0 commit comments

Comments
 (0)