Skip to content

Commit 995994a

Browse files
committed
Aggregation for index entries loaded in parallel (#293)
1 parent e3977fe commit 995994a

File tree

3 files changed

+47
-16
lines changed

3 files changed

+47
-16
lines changed

etc/check-package-size.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ echo "in root: gitoxide CLI"
2424
(enter git-config && indent cargo diet -n --package-size-limit 65KB)
2525
(enter git-hash && indent cargo diet -n --package-size-limit 10KB)
2626
(enter git-chunk && indent cargo diet -n --package-size-limit 10KB)
27-
(enter git-features && indent cargo diet -n --package-size-limit 35KB)
27+
(enter git-features && indent cargo diet -n --package-size-limit 40KB)
2828
(enter git-ref && indent cargo diet -n --package-size-limit 50KB)
2929
(enter git-diff && indent cargo diet -n --package-size-limit 10KB)
3030
(enter git-traverse && indent cargo diet -n --package-size-limit 10KB)

git-index/src/decode/mod.rs

+42-11
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ mod error {
2828
}
2929
}
3030
pub use error::Error;
31+
use git_features::parallel::InOrderIter;
3132

3233
#[derive(Default)]
3334
pub struct Options {
@@ -82,16 +83,18 @@ impl State {
8283
for (id, chunks) in entry_offsets.chunks(chunk_size).enumerate() {
8384
let chunks = chunks.to_vec();
8485
threads.push(scope.spawn(move |_| {
85-
let num_entries = chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
86-
let mut entries = Vec::with_capacity(num_entries);
87-
let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes(
88-
num_entries as u32,
89-
data.len() / num_chunks,
90-
start_of_extensions.map(|ofs| ofs / num_chunks),
91-
object_hash,
92-
version,
93-
);
94-
let mut path_backing = Vec::with_capacity(path_backing_buffer_size);
86+
let num_entries_for_chunks =
87+
chunks.iter().map(|c| c.num_entries).sum::<u32>() as usize;
88+
let mut entries = Vec::with_capacity(num_entries_for_chunks);
89+
let path_backing_buffer_size_for_chunks =
90+
entries::estimate_path_storage_requirements_in_bytes(
91+
num_entries_for_chunks as u32,
92+
data.len() / num_chunks,
93+
start_of_extensions.map(|ofs| ofs / num_chunks),
94+
object_hash,
95+
version,
96+
);
97+
let mut path_backing = Vec::with_capacity(path_backing_buffer_size_for_chunks);
9598
let mut is_sparse = false;
9699
for offset in chunks {
97100
let (
@@ -119,7 +122,35 @@ impl State {
119122
))
120123
}));
121124
}
122-
todo!("combined thread results in order ")
125+
let mut results =
126+
InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap()));
127+
let mut acc = results.next().expect("have at least two results, one per thread");
128+
// We explicitly don't adjust the reserve in acc and rather allow for more copying
129+
// to happens as vectors grow to keep the peak memory size low.
130+
// NOTE: one day, we might use a memory pool for paths. We could encode the block of memory
131+
// in some bytes in the path offset. That way there is more indirection/slower access
132+
// to the path, but it would save time here.
133+
// As it stands, `git` is definitely more efficient at this and probably uses less memory too.
134+
// Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just
135+
// 100GB/s on a single core.
136+
while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) {
137+
match res {
138+
Ok(rhs) => {
139+
lhs.is_sparse |= rhs.is_sparse;
140+
let ofs = lhs.path_backing.len();
141+
lhs.path_backing.extend(rhs.path_backing);
142+
lhs.entries.extend(rhs.entries.into_iter().map(|mut e| {
143+
e.path.start += ofs;
144+
e.path.end += ofs;
145+
e
146+
}));
147+
}
148+
Err(err) => {
149+
acc = Err(err);
150+
}
151+
}
152+
}
153+
acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..]))
123154
}
124155
None => load_entries(
125156
post_header_data,

git-index/src/extension/decode.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ pub fn header(data: &[u8]) -> (Signature, u32, &[u8]) {
66
(signature.try_into().unwrap(), from_be_u32(size), data)
77
}
88

9-
pub fn all(beginning_of_extensions: &[u8], object_hash: git_hash::Kind) -> (Outcome, &[u8]) {
10-
extension::Iter::new_without_checksum(beginning_of_extensions, object_hash)
9+
pub fn all(maybe_beginning_of_extensions: &[u8], object_hash: git_hash::Kind) -> (Outcome, &[u8]) {
10+
extension::Iter::new_without_checksum(maybe_beginning_of_extensions, object_hash)
1111
.map(|mut ext_iter| {
1212
let mut ext = Outcome::default();
1313
for (signature, ext_data) in ext_iter.by_ref() {
@@ -20,9 +20,9 @@ pub fn all(beginning_of_extensions: &[u8], object_hash: git_hash::Kind) -> (Outc
2020
_unknown => {} // skip unknown extensions, too
2121
}
2222
}
23-
(ext, &beginning_of_extensions[ext_iter.consumed..])
23+
(ext, &maybe_beginning_of_extensions[ext_iter.consumed..])
2424
})
25-
.unwrap_or_else(|| (Outcome::default(), beginning_of_extensions))
25+
.unwrap_or_else(|| (Outcome::default(), maybe_beginning_of_extensions))
2626
}
2727

2828
#[derive(Default)]

0 commit comments

Comments
 (0)