@@ -28,6 +28,7 @@ mod error {
28
28
}
29
29
}
30
30
pub use error:: Error ;
31
+ use git_features:: parallel:: InOrderIter ;
31
32
32
33
#[ derive( Default ) ]
33
34
pub struct Options {
@@ -82,16 +83,18 @@ impl State {
82
83
for ( id, chunks) in entry_offsets. chunks ( chunk_size) . enumerate ( ) {
83
84
let chunks = chunks. to_vec ( ) ;
84
85
threads. push ( scope. spawn ( move |_| {
85
- let num_entries = chunks. iter ( ) . map ( |c| c. num_entries ) . sum :: < u32 > ( ) as usize ;
86
- let mut entries = Vec :: with_capacity ( num_entries) ;
87
- let path_backing_buffer_size = entries:: estimate_path_storage_requirements_in_bytes (
88
- num_entries as u32 ,
89
- data. len ( ) / num_chunks,
90
- start_of_extensions. map ( |ofs| ofs / num_chunks) ,
91
- object_hash,
92
- version,
93
- ) ;
94
- let mut path_backing = Vec :: with_capacity ( path_backing_buffer_size) ;
86
+ let num_entries_for_chunks =
87
+ chunks. iter ( ) . map ( |c| c. num_entries ) . sum :: < u32 > ( ) as usize ;
88
+ let mut entries = Vec :: with_capacity ( num_entries_for_chunks) ;
89
+ let path_backing_buffer_size_for_chunks =
90
+ entries:: estimate_path_storage_requirements_in_bytes (
91
+ num_entries_for_chunks as u32 ,
92
+ data. len ( ) / num_chunks,
93
+ start_of_extensions. map ( |ofs| ofs / num_chunks) ,
94
+ object_hash,
95
+ version,
96
+ ) ;
97
+ let mut path_backing = Vec :: with_capacity ( path_backing_buffer_size_for_chunks) ;
95
98
let mut is_sparse = false ;
96
99
for offset in chunks {
97
100
let (
@@ -119,7 +122,35 @@ impl State {
119
122
) )
120
123
} ) ) ;
121
124
}
122
- todo ! ( "combined thread results in order " )
125
+ let mut results =
126
+ InOrderIter :: from ( threads. into_iter ( ) . map ( |thread| thread. join ( ) . unwrap ( ) ) ) ;
127
+ let mut acc = results. next ( ) . expect ( "have at least two results, one per thread" ) ;
128
+ // We explicitly don't adjust the reserve in acc and rather allow for more copying
129
+ // to happens as vectors grow to keep the peak memory size low.
130
+ // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory
131
+ // in some bytes in the path offset. That way there is more indirection/slower access
132
+ // to the path, but it would save time here.
133
+ // As it stands, `git` is definitely more efficient at this and probably uses less memory too.
134
+ // Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just
135
+ // 100GB/s on a single core.
136
+ while let ( Ok ( lhs) , Some ( res) ) = ( acc. as_mut ( ) , results. next ( ) ) {
137
+ match res {
138
+ Ok ( rhs) => {
139
+ lhs. is_sparse |= rhs. is_sparse ;
140
+ let ofs = lhs. path_backing . len ( ) ;
141
+ lhs. path_backing . extend ( rhs. path_backing ) ;
142
+ lhs. entries . extend ( rhs. entries . into_iter ( ) . map ( |mut e| {
143
+ e. path . start += ofs;
144
+ e. path . end += ofs;
145
+ e
146
+ } ) ) ;
147
+ }
148
+ Err ( err) => {
149
+ acc = Err ( err) ;
150
+ }
151
+ }
152
+ }
153
+ acc. map ( |acc| ( acc, & data[ data. len ( ) - object_hash. len_in_bytes ( ) ..] ) )
123
154
}
124
155
None => load_entries (
125
156
post_header_data,
0 commit comments