Skip to content

Commit d8b0919

Browse files
committed
feat: add option to traverse commits from oldest to newest
This change introduces an enum to control commit traversal order. Users can now choose between newest-first or oldest-first traversal. The default behavior remains newest-first, but it can be toggled by passing a CommitTimeOrder to a Sorting::ByCommitTime* variant. This feature is particularly useful for searching early repository history. The implementation remains largely agnostic to this change, with only minor logic adjustments in key areas as necessary. The reversed order is achieved by inverting the PriorityQueue key when an oldest-first traversal is requested.
1 parent 7bf2f4f commit d8b0919

File tree

3 files changed

+101
-43
lines changed

3 files changed

+101
-43
lines changed

gix-traverse/src/commit/simple.rs

Lines changed: 80 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,19 @@ use gix_date::SecondsSinceUnixEpoch;
22
use gix_hash::ObjectId;
33
use gix_hashtable::HashSet;
44
use smallvec::SmallVec;
5+
use std::cmp::Reverse;
56
use std::collections::VecDeque;
67

8+
#[derive(Default, Debug, Copy, Clone)]
9+
/// The order with which to prioritize the search
10+
pub enum CommitTimeOrder {
11+
#[default]
12+
/// sort commits by newest first
13+
NewestFirst,
14+
/// sort commits by oldest first
15+
OldestFirst,
16+
}
17+
718
/// Specify how to sort commits during a [simple](super::Simple) traversal.
819
///
920
/// ### Sample History
@@ -28,24 +39,27 @@ pub enum Sorting {
2839
/// as it avoids overlapping branches.
2940
#[default]
3041
BreadthFirst,
31-
/// Commits are sorted by their commit time in descending order, that is newest first.
42+
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
3243
///
3344
/// The sorting applies to all currently queued commit ids and thus is full.
3445
///
35-
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
46+
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for NewestFirst
47+
/// Or `1, 2, 3, 4, 5, 6, 7, 8` for OldestFirst
3648
///
3749
/// # Performance
3850
///
3951
/// This mode benefits greatly from having an object_cache in `find()`
4052
/// to avoid having to lookup each commit twice.
41-
ByCommitTimeNewestFirst,
53+
ByCommitTime(CommitTimeOrder),
4254
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
4355
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
4456
///
4557
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
4658
///
4759
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
48-
ByCommitTimeNewestFirstCutoffOlderThan {
60+
ByCommitTimeCutoff {
61+
/// The order in wich to prioritize lookups
62+
order: CommitTimeOrder,
4963
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
5064
seconds: gix_date::SecondsSinceUnixEpoch,
5165
},
@@ -61,11 +75,14 @@ pub enum Error {
6175
ObjectDecode(#[from] gix_object::decode::Error),
6276
}
6377

78+
use Result as Either;
79+
type QueueKey<T> = Either<T, Reverse<T>>;
80+
6481
/// The state used and potentially shared by multiple graph traversals.
6582
#[derive(Clone)]
6683
pub(super) struct State {
6784
next: VecDeque<ObjectId>,
68-
queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>,
85+
queue: gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, ObjectId>,
6986
buf: Vec<u8>,
7087
seen: HashSet<ObjectId>,
7188
parents_buf: Vec<u8>,
@@ -77,10 +94,13 @@ mod init {
7794
use gix_date::SecondsSinceUnixEpoch;
7895
use gix_hash::{oid, ObjectId};
7996
use gix_object::{CommitRefIter, FindExt};
97+
use std::cmp::Reverse;
98+
use Err as Oldest;
99+
use Ok as Newest;
80100

81101
use super::{
82102
super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
83-
collect_parents, Error, State,
103+
collect_parents, CommitTimeOrder, Error, State,
84104
};
85105

86106
impl Default for State {
@@ -105,6 +125,14 @@ mod init {
105125
}
106126
}
107127

128+
fn order_time(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
129+
if let CommitTimeOrder::NewestFirst = order {
130+
Newest(i)
131+
} else {
132+
Oldest(Reverse(i))
133+
}
134+
}
135+
108136
/// Builder
109137
impl<Find, Predicate> Simple<Find, Predicate>
110138
where
@@ -117,19 +145,23 @@ mod init {
117145
Sorting::BreadthFirst => {
118146
self.queue_to_vecdeque();
119147
}
120-
Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
148+
Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
121149
let cutoff_time = self.sorting.cutoff_time();
122150
let state = &mut self.state;
123151
for commit_id in state.next.drain(..) {
124152
let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?;
125153
let time = commit_iter.committer()?.time.seconds;
126-
match cutoff_time {
127-
Some(cutoff_time) if time >= cutoff_time => {
128-
state.queue.insert(time, commit_id);
154+
let ordered_time = order_time(time, order);
155+
match (cutoff_time, order) {
156+
(Some(cutoff_time), CommitTimeOrder::NewestFirst) if time >= cutoff_time => {
157+
state.queue.insert(ordered_time, commit_id);
158+
}
159+
(Some(cutoff_time), CommitTimeOrder::OldestFirst) if time <= cutoff_time => {
160+
state.queue.insert(ordered_time, commit_id);
129161
}
130-
Some(_) => {}
131-
None => {
132-
state.queue.insert(time, commit_id);
162+
(Some(_), _) => {}
163+
(None, _) => {
164+
state.queue.insert(ordered_time, commit_id);
133165
}
134166
}
135167
}
@@ -254,10 +286,8 @@ mod init {
254286
} else {
255287
match self.sorting {
256288
Sorting::BreadthFirst => self.next_by_topology(),
257-
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
258-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
259-
self.next_by_commit_date(seconds.into())
260-
}
289+
Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
290+
Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
261291
}
262292
}
263293
}
@@ -267,7 +297,7 @@ mod init {
267297
/// If not topo sort, provide the cutoff date if present.
268298
fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
269299
match self {
270-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds),
300+
Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
271301
_ => None,
272302
}
273303
}
@@ -281,18 +311,21 @@ mod init {
281311
{
282312
fn next_by_commit_date(
283313
&mut self,
284-
cutoff_older_than: Option<SecondsSinceUnixEpoch>,
314+
order: CommitTimeOrder,
315+
cutoff: Option<SecondsSinceUnixEpoch>,
285316
) -> Option<Result<Info, Error>> {
286317
let state = &mut self.state;
287318

288-
let (commit_time, oid) = state.queue.pop()?;
319+
let (commit_time, oid) = match state.queue.pop()? {
320+
(Newest(t) | Oldest(Reverse(t)), o) => (t, o),
321+
};
289322
let mut parents: ParentIds = Default::default();
290323
match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
291324
Ok(Either::CachedCommit(commit)) => {
292325
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
293326
// drop corrupt caches and try again with ODB
294327
self.cache = None;
295-
return self.next_by_commit_date(cutoff_older_than);
328+
return self.next_by_commit_date(order, cutoff);
296329
}
297330
for (id, parent_commit_time) in state.parent_ids.drain(..) {
298331
parents.push(id);
@@ -301,9 +334,19 @@ mod init {
301334
continue;
302335
}
303336

304-
match cutoff_older_than {
305-
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
306-
Some(_) | None => state.queue.insert(parent_commit_time, id),
337+
let time = order_time(parent_commit_time, order);
338+
match (cutoff, order) {
339+
(Some(cutoff_older_than), CommitTimeOrder::NewestFirst)
340+
if parent_commit_time < cutoff_older_than =>
341+
{
342+
continue
343+
}
344+
(Some(cutoff_newer_than), CommitTimeOrder::OldestFirst)
345+
if parent_commit_time > cutoff_newer_than =>
346+
{
347+
continue
348+
}
349+
(Some(_) | None, _) => state.queue.insert(time, id),
307350
}
308351
}
309352
}
@@ -323,9 +366,19 @@ mod init {
323366
.and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
324367
.unwrap_or_default();
325368

326-
match cutoff_older_than {
327-
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
328-
Some(_) | None => state.queue.insert(parent_commit_time, id),
369+
let time = order_time(parent_commit_time, order);
370+
match (cutoff, order) {
371+
(Some(cutoff_older_than), CommitTimeOrder::NewestFirst)
372+
if parent_commit_time < cutoff_older_than =>
373+
{
374+
continue
375+
}
376+
(Some(cutoff_newer_than), CommitTimeOrder::OldestFirst)
377+
if parent_commit_time > cutoff_newer_than =>
378+
{
379+
continue
380+
}
381+
(Some(_) | None, _) => state.queue.insert(time, id),
329382
}
330383
}
331384
Ok(_unused_token) => break,

gix/src/revision/spec/parse/delegate/navigate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
192192
match oid
193193
.attach(repo)
194194
.ancestors()
195-
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
195+
.sorting(crate::revision::walk::Sorting::ByCommitTime(Default::default()))
196196
.all()
197197
{
198198
Ok(iter) => {
@@ -245,7 +245,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
245245
.filter(|r| r.id().header().ok().map_or(false, |obj| obj.kind().is_commit()))
246246
.filter_map(|r| r.detach().peeled),
247247
)
248-
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
248+
.sorting(crate::revision::walk::Sorting::ByCommitTime(Default::default()))
249249
.all()
250250
{
251251
Ok(iter) => {

gix/src/revision/walk.rs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use gix_hash::ObjectId;
22
use gix_object::FindExt;
3+
use gix_traverse::commit::simple::CommitTimeOrder;
34

45
use crate::{ext::ObjectIdExt, revision, Repository};
56

@@ -39,25 +40,28 @@ pub enum Sorting {
3940
/// as it avoids overlapping branches.
4041
#[default]
4142
BreadthFirst,
42-
/// Commits are sorted by their commit time in descending order, that is newest first.
43+
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
4344
///
4445
/// The sorting applies to all currently queued commit ids and thus is full.
4546
///
46-
/// In the *sample history* the order would be `8, 7, 6, 4, 5, 2, 3, 1`
47+
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for NewestFirst
48+
/// Or `1, 2, 3, 4, 5, 6, 7, 8` for OldestFirst
4749
///
4850
/// # Performance
4951
///
50-
/// This mode benefits greatly from having an [object cache](crate::Repository::object_cache_size) configured
51-
/// to avoid having to look up each commit twice.
52-
ByCommitTimeNewestFirst,
52+
/// This mode benefits greatly from having an object_cache in `find()`
53+
/// to avoid having to lookup each commit twice.
54+
ByCommitTime(CommitTimeOrder),
5355
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
5456
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
5557
///
5658
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
5759
///
5860
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
59-
ByCommitTimeNewestFirstCutoffOlderThan {
60-
/// The amount of seconds since unix epoch to use as cut-off time.
61+
ByCommitTimeCutoff {
62+
/// The order in wich to prioritize lookups
63+
order: CommitTimeOrder,
64+
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
6165
seconds: gix_date::SecondsSinceUnixEpoch,
6266
},
6367
}
@@ -66,9 +70,9 @@ impl Sorting {
6670
fn into_simple(self) -> Option<gix_traverse::commit::simple::Sorting> {
6771
Some(match self {
6872
Sorting::BreadthFirst => gix_traverse::commit::simple::Sorting::BreadthFirst,
69-
Sorting::ByCommitTimeNewestFirst => gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst,
70-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
71-
gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds }
73+
Sorting::ByCommitTime(order) => gix_traverse::commit::simple::Sorting::ByCommitTime(order),
74+
Sorting::ByCommitTimeCutoff { seconds, order } => {
75+
gix_traverse::commit::simple::Sorting::ByCommitTimeCutoff { order, seconds }
7276
}
7377
})
7478
}
@@ -214,9 +218,10 @@ impl<'repo> Platform<'repo> {
214218
/// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date.
215219
#[doc(alias = "hide", alias = "git2")]
216220
pub fn with_pruned(mut self, ids: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
217-
let mut cutoff = match self.sorting {
218-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(seconds),
219-
Sorting::BreadthFirst | Sorting::ByCommitTimeNewestFirst => None,
221+
let (mut cutoff, order) = match self.sorting {
222+
Sorting::ByCommitTimeCutoff { seconds, order } => (Some(seconds), order),
223+
Sorting::ByCommitTime(order) => (None, order),
224+
Sorting::BreadthFirst => (None, CommitTimeOrder::default()),
220225
};
221226
for id in ids.into_iter() {
222227
let id = id.into();
@@ -231,7 +236,7 @@ impl<'repo> Platform<'repo> {
231236
}
232237

233238
if let Some(cutoff) = cutoff {
234-
self.sorting = Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: cutoff }
239+
self.sorting = Sorting::ByCommitTimeCutoff { seconds: cutoff, order }
235240
}
236241
self
237242
}

0 commit comments

Comments
 (0)