Skip to content

Commit 0947c70

Browse files
committed
First attempt to get progress information from stat worker. (#470)
But for some reason, the counter stays at 0 despite sharing the counter stat.
1 parent 0871a96 commit 0947c70

File tree

5 files changed

+192
-22
lines changed

5 files changed

+192
-22
lines changed

Cargo.lock

+57-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ git-repository = { version = "^0.24.0", path = "git-repository", default-feature
8686
git-transport-for-configuration-only = { package = "git-transport", optional = true, version = "^0.20.0", path = "git-transport" }
8787

8888
clap = { version = "3.2.5", features = ["derive", "cargo"] }
89-
prodash = { version = "20.1.0", optional = true, default-features = false }
89+
prodash = { version = "20.1.1", optional = true, default-features = false }
9090
atty = { version = "0.2.14", optional = true, default-features = false }
9191
env_logger = { version = "0.9.0", default-features = false }
9292
crosstermion = { version = "0.10.1", optional = true, default-features = false }

git-features/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ crc32fast = { version = "1.2.1", optional = true }
115115
sha1 = { version = "0.10.0", optional = true }
116116

117117
# progress
118-
prodash = { version = "20.1.0", optional = true, default-features = false, features = ["unit-bytes", "unit-human"] }
118+
prodash = { version = "20.1.1", optional = true, default-features = false, features = ["unit-bytes", "unit-human"] }
119119

120120
# pipe
121121
bytes = { version = "1.0.0", optional = true }

gitoxide-core/Cargo.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ default = []
1818
## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim).
1919
organize = ["git-url", "jwalk"]
2020
## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours).
21-
estimate-hours = ["itertools", "fs-err"]
21+
estimate-hours = ["itertools", "fs-err", "num_cpus", "flume"]
2222

2323
#! ### Mutually Exclusive Networking
2424
#! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used.
@@ -59,8 +59,11 @@ blocking = { version = "1.0.2", optional = true }
5959
git-url = { version = "^0.8.0", path = "../git-url", optional = true }
6060
jwalk = { version = "0.6.0", optional = true }
6161

62+
# for 'hours'
6263
itertools = { version = "0.10.1", optional = true }
6364
fs-err = { version = "2.6.0", optional = true }
65+
num_cpus = { version = "1.13.1", optional = true }
66+
flume = { version = "0.10.14", optional = true }
6467

6568
document-features = { version = "0.2.0", optional = true }
6669

gitoxide-core/src/hours.rs

+129-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use std::collections::BTreeSet;
2+
use std::convert::Infallible;
3+
use std::sync::atomic::Ordering;
24
use std::{
35
collections::{hash_map::Entry, HashMap},
46
io,
@@ -9,7 +11,7 @@ use std::{
911
use anyhow::{anyhow, bail};
1012
use git_repository as git;
1113
use git_repository::bstr::BStr;
12-
use git_repository::{actor, bstr::ByteSlice, interrupt, objs, prelude::*, progress, Progress};
14+
use git_repository::{actor, bstr::ByteSlice, interrupt, prelude::*, progress, Progress};
1315
use itertools::Itertools;
1416

1517
/// Additional configuration for the hours estimation functionality.
@@ -40,7 +42,7 @@ pub fn estimate<W, P>(
4042
Context {
4143
show_pii,
4244
ignore_bots,
43-
stats: _,
45+
stats,
4446
omit_unify_identities,
4547
mut out,
4648
}: Context<W>,
@@ -53,18 +55,25 @@ where
5355
let commit_id = repo.rev_parse_single(rev_spec)?.detach();
5456
let mut string_heap = BTreeSet::<&'static [u8]>::new();
5557

56-
let (all_commits, is_shallow) = {
57-
let mut progress = progress.add_child("Traverse commit graph");
58+
let (commit_authors, is_shallow) = {
59+
let stat_progress = stats.then(|| progress.add_child("extract stats")).map(|mut p| {
60+
p.init(None, progress::count("commits"));
61+
p
62+
});
63+
let stat_counter = stat_progress.as_ref().and_then(|p| p.counter());
64+
65+
let mut progress = progress.add_child("traverse commit graph");
66+
progress.init(None, progress::count("commits"));
67+
5868
std::thread::scope(|scope| -> anyhow::Result<(Vec<actor::SignatureRef<'static>>, bool)> {
5969
let start = Instant::now();
60-
progress.init(None, progress::count("commits"));
6170
let (tx, rx) = std::sync::mpsc::channel::<Vec<u8>>();
6271
let mailmap = repo.open_mailmap();
6372

64-
let handle = scope.spawn(move || -> anyhow::Result<Vec<actor::SignatureRef<'static>>> {
73+
let commit_thread = scope.spawn(move || -> anyhow::Result<Vec<actor::SignatureRef<'static>>> {
6574
let mut out = Vec::new();
6675
for commit_data in rx {
67-
if let Some(author) = objs::CommitRefIter::from_bytes(&commit_data)
76+
if let Some(author) = git::objs::CommitRefIter::from_bytes(&commit_data)
6877
.author()
6978
.map(|author| mailmap.resolve_cow(author.trim()))
7079
.ok()
@@ -101,12 +110,89 @@ where
101110
Ok(out)
102111
});
103112

113+
let (tx_tree_id, stat_threads) = stats
114+
.then(|| {
115+
let num_threads = num_cpus::get().saturating_sub(1 /*main thread*/).max(1);
116+
let (tx, rx) = flume::unbounded::<(u32, Option<git::hash::ObjectId>, git::hash::ObjectId)>();
117+
let stat_workers = (0..num_threads)
118+
.map(|_| {
119+
scope.spawn({
120+
let counter = stat_counter.clone();
121+
let mut repo = repo.clone();
122+
repo.object_cache_size_if_unset(4 * 1024 * 1024);
123+
let rx = rx.clone();
124+
move || -> Result<_, git::object::tree::diff::Error> {
125+
let mut out = Vec::new();
126+
for (commit_idx, parent_commit, commit) in rx {
127+
if let Some(c) = counter.as_ref() {
128+
c.fetch_add(1, Ordering::SeqCst);
129+
}
130+
let mut stat = Stats::default();
131+
let from = match parent_commit {
132+
Some(id) => {
133+
match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) {
134+
Some(tree) => tree,
135+
None => continue,
136+
}
137+
}
138+
None => repo
139+
.find_object(git::hash::ObjectId::empty_tree(repo.object_hash()))
140+
.expect("always present")
141+
.into_tree(),
142+
};
143+
let to = match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok())
144+
{
145+
Some(c) => c,
146+
None => continue,
147+
};
148+
from.changes().for_each_to_obtain_tree(&to, |change| {
149+
use git::object::tree::diff::change::Event::*;
150+
match change.event {
151+
Addition { entry_mode, .. } => {
152+
if entry_mode.is_no_tree() {
153+
stat.added += 1
154+
}
155+
}
156+
Deletion { entry_mode, .. } => {
157+
if entry_mode.is_no_tree() {
158+
stat.removed += 1
159+
}
160+
}
161+
Modification { entry_mode, .. } => {
162+
if entry_mode.is_no_tree() {
163+
stat.modified += 1;
164+
}
165+
}
166+
}
167+
Ok::<_, Infallible>(Default::default())
168+
})?;
169+
out.push((commit_idx, stat));
170+
}
171+
Ok(out)
172+
}
173+
})
174+
})
175+
.collect::<Vec<_>>();
176+
(Some(tx), stat_workers)
177+
})
178+
.unwrap_or_else(Default::default);
179+
180+
let mut commit_idx = 0_u32;
104181
let commit_iter = interrupt::Iter::new(
105182
commit_id.ancestors(|oid, buf| {
106183
progress.inc();
107184
repo.objects.find(oid, buf).map(|o| {
108185
tx.send(o.data.to_owned()).ok();
109-
objs::CommitRefIter::from_bytes(o.data)
186+
if let Some((tx_tree, first_parent, commit)) = tx_tree_id.as_ref().and_then(|tx| {
187+
git::objs::CommitRefIter::from_bytes(o.data)
188+
.parent_ids()
189+
.next()
190+
.map(|first_parent| (tx, Some(first_parent), oid.to_owned()))
191+
}) {
192+
tx_tree.send((commit_idx, first_parent, commit)).ok();
193+
}
194+
commit_idx += 1;
195+
git::objs::CommitRefIter::from_bytes(o.data)
110196
})
111197
}),
112198
|| anyhow!("Cancelled by user"),
@@ -123,23 +209,38 @@ where
123209
};
124210
}
125211
drop(tx);
212+
drop(tx_tree_id);
126213
progress.show_throughput(start);
127-
Ok((handle.join().expect("no panic")?, is_shallow))
214+
215+
let _stats_by_commit_idx = match stat_progress {
216+
Some(mut progress) => {
217+
progress.init(Some(commit_idx as usize), progress::count("commits"));
218+
let mut stats = Vec::new();
219+
for handle in stat_threads {
220+
stats.extend(handle.join().expect("no panic")?);
221+
}
222+
progress.show_throughput(start);
223+
stats
224+
}
225+
None => Vec::new(),
226+
};
227+
228+
Ok((commit_thread.join().expect("no panic")?, is_shallow))
128229
})?
129230
};
130231

131-
if all_commits.is_empty() {
232+
if commit_authors.is_empty() {
132233
bail!("No commits to process");
133234
}
134235

135236
let start = Instant::now();
136-
let mut current_email = &all_commits[0].email;
237+
let mut current_email = &commit_authors[0].email;
137238
let mut slice_start = 0;
138239
let mut results_by_hours = Vec::new();
139240
let mut ignored_bot_commits = 0_u32;
140-
for (idx, elm) in all_commits.iter().enumerate() {
241+
for (idx, elm) in commit_authors.iter().enumerate() {
141242
if elm.email != *current_email {
142-
let estimate = estimate_hours(&all_commits[slice_start..idx]);
243+
let estimate = estimate_hours(&commit_authors[slice_start..idx]);
143244
slice_start = idx;
144245
current_email = &elm.email;
145246
if ignore_bots && estimate.name.contains_str(b"[bot]") {
@@ -149,7 +250,7 @@ where
149250
results_by_hours.push(estimate);
150251
}
151252
}
152-
if let Some(commits) = all_commits.get(slice_start..) {
253+
if let Some(commits) = commit_authors.get(slice_start..) {
153254
results_by_hours.push(estimate_hours(commits));
154255
}
155256

@@ -167,9 +268,9 @@ where
167268
let elapsed = start.elapsed();
168269
progress.done(format!(
169270
"Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)",
170-
all_commits.len(),
271+
commit_authors.len(),
171272
elapsed,
172-
all_commits.len() as f32 / elapsed.as_secs_f32()
273+
commit_authors.len() as f32 / elapsed.as_secs_f32()
173274
));
174275

175276
let num_unique_authors = results_by_hours.len();
@@ -207,7 +308,7 @@ where
207308
}
208309
assert_eq!(
209310
total_commits,
210-
all_commits.len() as u32 - ignored_bot_commits,
311+
commit_authors.len() as u32 - ignored_bot_commits,
211312
"need to get all commits"
212313
);
213314
Ok(())
@@ -328,3 +429,14 @@ struct WorkByEmail {
328429
hours: f32,
329430
num_commits: u32,
330431
}
432+
433+
/// Statistics for a particular commit.
434+
#[derive(Debug, Default)]
435+
struct Stats {
436+
/// amount of added files
437+
added: usize,
438+
/// amount of removed files
439+
removed: usize,
440+
/// amount of modified files
441+
modified: usize,
442+
}

0 commit comments

Comments
 (0)