1
1
use std:: collections:: BTreeSet ;
2
+ use std:: convert:: Infallible ;
3
+ use std:: sync:: atomic:: Ordering ;
2
4
use std:: {
3
5
collections:: { hash_map:: Entry , HashMap } ,
4
6
io,
@@ -9,7 +11,7 @@ use std::{
9
11
use anyhow:: { anyhow, bail} ;
10
12
use git_repository as git;
11
13
use git_repository:: bstr:: BStr ;
12
- use git_repository:: { actor, bstr:: ByteSlice , interrupt, objs , prelude:: * , progress, Progress } ;
14
+ use git_repository:: { actor, bstr:: ByteSlice , interrupt, prelude:: * , progress, Progress } ;
13
15
use itertools:: Itertools ;
14
16
15
17
/// Additional configuration for the hours estimation functionality.
@@ -40,7 +42,7 @@ pub fn estimate<W, P>(
40
42
Context {
41
43
show_pii,
42
44
ignore_bots,
43
- stats : _ ,
45
+ stats,
44
46
omit_unify_identities,
45
47
mut out,
46
48
} : Context < W > ,
@@ -53,18 +55,25 @@ where
53
55
let commit_id = repo. rev_parse_single ( rev_spec) ?. detach ( ) ;
54
56
let mut string_heap = BTreeSet :: < & ' static [ u8 ] > :: new ( ) ;
55
57
56
- let ( all_commits, is_shallow) = {
57
- let mut progress = progress. add_child ( "Traverse commit graph" ) ;
58
+ let ( commit_authors, is_shallow) = {
59
+ let stat_progress = stats. then ( || progress. add_child ( "extract stats" ) ) . map ( |mut p| {
60
+ p. init ( None , progress:: count ( "commits" ) ) ;
61
+ p
62
+ } ) ;
63
+ let stat_counter = stat_progress. as_ref ( ) . and_then ( |p| p. counter ( ) ) ;
64
+
65
+ let mut progress = progress. add_child ( "traverse commit graph" ) ;
66
+ progress. init ( None , progress:: count ( "commits" ) ) ;
67
+
58
68
std:: thread:: scope ( |scope| -> anyhow:: Result < ( Vec < actor:: SignatureRef < ' static > > , bool ) > {
59
69
let start = Instant :: now ( ) ;
60
- progress. init ( None , progress:: count ( "commits" ) ) ;
61
70
let ( tx, rx) = std:: sync:: mpsc:: channel :: < Vec < u8 > > ( ) ;
62
71
let mailmap = repo. open_mailmap ( ) ;
63
72
64
- let handle = scope. spawn ( move || -> anyhow:: Result < Vec < actor:: SignatureRef < ' static > > > {
73
+ let commit_thread = scope. spawn ( move || -> anyhow:: Result < Vec < actor:: SignatureRef < ' static > > > {
65
74
let mut out = Vec :: new ( ) ;
66
75
for commit_data in rx {
67
- if let Some ( author) = objs:: CommitRefIter :: from_bytes ( & commit_data)
76
+ if let Some ( author) = git :: objs:: CommitRefIter :: from_bytes ( & commit_data)
68
77
. author ( )
69
78
. map ( |author| mailmap. resolve_cow ( author. trim ( ) ) )
70
79
. ok ( )
@@ -101,12 +110,89 @@ where
101
110
Ok ( out)
102
111
} ) ;
103
112
113
+ let ( tx_tree_id, stat_threads) = stats
114
+ . then ( || {
115
+ let num_threads = num_cpus:: get ( ) . saturating_sub ( 1 /*main thread*/ ) . max ( 1 ) ;
116
+ let ( tx, rx) = flume:: unbounded :: < ( u32 , Option < git:: hash:: ObjectId > , git:: hash:: ObjectId ) > ( ) ;
117
+ let stat_workers = ( 0 ..num_threads)
118
+ . map ( |_| {
119
+ scope. spawn ( {
120
+ let counter = stat_counter. clone ( ) ;
121
+ let mut repo = repo. clone ( ) ;
122
+ repo. object_cache_size_if_unset ( 4 * 1024 * 1024 ) ;
123
+ let rx = rx. clone ( ) ;
124
+ move || -> Result < _ , git:: object:: tree:: diff:: Error > {
125
+ let mut out = Vec :: new ( ) ;
126
+ for ( commit_idx, parent_commit, commit) in rx {
127
+ if let Some ( c) = counter. as_ref ( ) {
128
+ c. fetch_add ( 1 , Ordering :: SeqCst ) ;
129
+ }
130
+ let mut stat = Stats :: default ( ) ;
131
+ let from = match parent_commit {
132
+ Some ( id) => {
133
+ match repo. find_object ( id) . ok ( ) . and_then ( |c| c. peel_to_tree ( ) . ok ( ) ) {
134
+ Some ( tree) => tree,
135
+ None => continue ,
136
+ }
137
+ }
138
+ None => repo
139
+ . find_object ( git:: hash:: ObjectId :: empty_tree ( repo. object_hash ( ) ) )
140
+ . expect ( "always present" )
141
+ . into_tree ( ) ,
142
+ } ;
143
+ let to = match repo. find_object ( commit) . ok ( ) . and_then ( |c| c. peel_to_tree ( ) . ok ( ) )
144
+ {
145
+ Some ( c) => c,
146
+ None => continue ,
147
+ } ;
148
+ from. changes ( ) . for_each_to_obtain_tree ( & to, |change| {
149
+ use git:: object:: tree:: diff:: change:: Event :: * ;
150
+ match change. event {
151
+ Addition { entry_mode, .. } => {
152
+ if entry_mode. is_no_tree ( ) {
153
+ stat. added += 1
154
+ }
155
+ }
156
+ Deletion { entry_mode, .. } => {
157
+ if entry_mode. is_no_tree ( ) {
158
+ stat. removed += 1
159
+ }
160
+ }
161
+ Modification { entry_mode, .. } => {
162
+ if entry_mode. is_no_tree ( ) {
163
+ stat. modified += 1 ;
164
+ }
165
+ }
166
+ }
167
+ Ok :: < _ , Infallible > ( Default :: default ( ) )
168
+ } ) ?;
169
+ out. push ( ( commit_idx, stat) ) ;
170
+ }
171
+ Ok ( out)
172
+ }
173
+ } )
174
+ } )
175
+ . collect :: < Vec < _ > > ( ) ;
176
+ ( Some ( tx) , stat_workers)
177
+ } )
178
+ . unwrap_or_else ( Default :: default) ;
179
+
180
+ let mut commit_idx = 0_u32 ;
104
181
let commit_iter = interrupt:: Iter :: new (
105
182
commit_id. ancestors ( |oid, buf| {
106
183
progress. inc ( ) ;
107
184
repo. objects . find ( oid, buf) . map ( |o| {
108
185
tx. send ( o. data . to_owned ( ) ) . ok ( ) ;
109
- objs:: CommitRefIter :: from_bytes ( o. data )
186
+ if let Some ( ( tx_tree, first_parent, commit) ) = tx_tree_id. as_ref ( ) . and_then ( |tx| {
187
+ git:: objs:: CommitRefIter :: from_bytes ( o. data )
188
+ . parent_ids ( )
189
+ . next ( )
190
+ . map ( |first_parent| ( tx, Some ( first_parent) , oid. to_owned ( ) ) )
191
+ } ) {
192
+ tx_tree. send ( ( commit_idx, first_parent, commit) ) . ok ( ) ;
193
+ }
194
+ commit_idx += 1 ;
195
+ git:: objs:: CommitRefIter :: from_bytes ( o. data )
110
196
} )
111
197
} ) ,
112
198
|| anyhow ! ( "Cancelled by user" ) ,
@@ -123,23 +209,38 @@ where
123
209
} ;
124
210
}
125
211
drop ( tx) ;
212
+ drop ( tx_tree_id) ;
126
213
progress. show_throughput ( start) ;
127
- Ok ( ( handle. join ( ) . expect ( "no panic" ) ?, is_shallow) )
214
+
215
+ let _stats_by_commit_idx = match stat_progress {
216
+ Some ( mut progress) => {
217
+ progress. init ( Some ( commit_idx as usize ) , progress:: count ( "commits" ) ) ;
218
+ let mut stats = Vec :: new ( ) ;
219
+ for handle in stat_threads {
220
+ stats. extend ( handle. join ( ) . expect ( "no panic" ) ?) ;
221
+ }
222
+ progress. show_throughput ( start) ;
223
+ stats
224
+ }
225
+ None => Vec :: new ( ) ,
226
+ } ;
227
+
228
+ Ok ( ( commit_thread. join ( ) . expect ( "no panic" ) ?, is_shallow) )
128
229
} ) ?
129
230
} ;
130
231
131
- if all_commits . is_empty ( ) {
232
+ if commit_authors . is_empty ( ) {
132
233
bail ! ( "No commits to process" ) ;
133
234
}
134
235
135
236
let start = Instant :: now ( ) ;
136
- let mut current_email = & all_commits [ 0 ] . email ;
237
+ let mut current_email = & commit_authors [ 0 ] . email ;
137
238
let mut slice_start = 0 ;
138
239
let mut results_by_hours = Vec :: new ( ) ;
139
240
let mut ignored_bot_commits = 0_u32 ;
140
- for ( idx, elm) in all_commits . iter ( ) . enumerate ( ) {
241
+ for ( idx, elm) in commit_authors . iter ( ) . enumerate ( ) {
141
242
if elm. email != * current_email {
142
- let estimate = estimate_hours ( & all_commits [ slice_start..idx] ) ;
243
+ let estimate = estimate_hours ( & commit_authors [ slice_start..idx] ) ;
143
244
slice_start = idx;
144
245
current_email = & elm. email ;
145
246
if ignore_bots && estimate. name . contains_str ( b"[bot]" ) {
@@ -149,7 +250,7 @@ where
149
250
results_by_hours. push ( estimate) ;
150
251
}
151
252
}
152
- if let Some ( commits) = all_commits . get ( slice_start..) {
253
+ if let Some ( commits) = commit_authors . get ( slice_start..) {
153
254
results_by_hours. push ( estimate_hours ( commits) ) ;
154
255
}
155
256
@@ -167,9 +268,9 @@ where
167
268
let elapsed = start. elapsed ( ) ;
168
269
progress. done ( format ! (
169
270
"Extracted and organized data from {} commits in {:?} ({:0.0} commits/s)" ,
170
- all_commits . len( ) ,
271
+ commit_authors . len( ) ,
171
272
elapsed,
172
- all_commits . len( ) as f32 / elapsed. as_secs_f32( )
273
+ commit_authors . len( ) as f32 / elapsed. as_secs_f32( )
173
274
) ) ;
174
275
175
276
let num_unique_authors = results_by_hours. len ( ) ;
@@ -207,7 +308,7 @@ where
207
308
}
208
309
assert_eq ! (
209
310
total_commits,
210
- all_commits . len( ) as u32 - ignored_bot_commits,
311
+ commit_authors . len( ) as u32 - ignored_bot_commits,
211
312
"need to get all commits"
212
313
) ;
213
314
Ok ( ( ) )
@@ -328,3 +429,14 @@ struct WorkByEmail {
328
429
hours : f32 ,
329
430
num_commits : u32 ,
330
431
}
432
+
433
+ /// Statistics for a particular commit.
434
+ #[ derive( Debug , Default ) ]
435
+ struct Stats {
436
+ /// amount of added files
437
+ added : usize ,
438
+ /// amount of removed files
439
+ removed : usize ,
440
+ /// amount of modified files
441
+ modified : usize ,
442
+ }
0 commit comments