Skip to content

Commit ea0babe

Browse files
committed
feat!: tree::depthfirst() traversal
A depth-first traversal yields the `.git/index` order. It's a breaking change as the `Visitor` trait gets another way to pop a tracked path, suitable for the stack used for depth first.
1 parent 8141765 commit ea0babe

File tree

12 files changed

+448
-157
lines changed

12 files changed

+448
-157
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-traverse/src/tree/breadthfirst.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ use std::collections::VecDeque;
22

33
use gix_hash::ObjectId;
44

5-
/// The error is part of the item returned by the [`traverse()`][impl_::traverse()] function.
5+
/// The error is part of the item returned by the [`breadthfirst()`](crate::tree::breadthfirst()) and
6+
///[`depthfirst()`](crate::tree::depthfirst()) functions.
67
#[derive(Debug, thiserror::Error)]
78
#[allow(missing_docs)]
89
pub enum Error {
@@ -28,7 +29,7 @@ impl State {
2829
}
2930
}
3031

31-
pub(crate) mod impl_ {
32+
pub(super) mod function {
3233
use std::borrow::BorrowMut;
3334

3435
use gix_object::{FindExt, TreeRefIter};
@@ -38,6 +39,8 @@ pub(crate) mod impl_ {
3839

3940
/// Start a breadth-first iteration over the `root` trees entries.
4041
///
42+
/// Note that non-trees will be listed first, so the natural order of entries within a tree is lost.
43+
///
4144
/// * `root`
4245
/// * the tree to iterate in a nested fashion.
4346
/// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing
@@ -46,9 +49,9 @@ pub(crate) mod impl_ {
4649
/// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function
4750
/// as needed. The return value is `Option<TreeIter>` which degenerates all error information. Not finding a commit should also
4851
/// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::Find`] should
49-
/// be escalated into a more specific error if its encountered by the caller.
52+
/// be escalated into a more specific error if it's encountered by the caller.
5053
/// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use.
51-
pub fn traverse<StateMut, Find, V>(
54+
pub fn breadthfirst<StateMut, Find, V>(
5255
root: TreeRefIter<'_>,
5356
mut state: StateMut,
5457
objects: Find,

gix-traverse/src/tree/depthfirst.rs

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
pub use super::breadthfirst::Error;
2+
3+
/// The state used and potentially shared by multiple tree traversals, reusing memory.
4+
#[derive(Default, Clone)]
5+
pub struct State {
6+
freelist: Vec<Vec<u8>>,
7+
}
8+
9+
impl State {
10+
/// Pop one empty buffer from the free-list.
11+
pub fn pop_buf(&mut self) -> Vec<u8> {
12+
match self.freelist.pop() {
13+
None => Vec::new(),
14+
Some(mut buf) => {
15+
buf.clear();
16+
buf
17+
}
18+
}
19+
}
20+
21+
/// Make `buf` available for re-use with [`Self::pop_buf()`].
22+
pub fn push_buf(&mut self, buf: Vec<u8>) {
23+
self.freelist.push(buf);
24+
}
25+
}
26+
27+
pub(super) mod function {
28+
use super::{Error, State};
29+
use crate::tree::visit::Action;
30+
use crate::tree::Visit;
31+
use gix_hash::ObjectId;
32+
use gix_object::{FindExt, TreeRefIter};
33+
use std::borrow::BorrowMut;
34+
35+
/// A depth-first traversal of the `root` tree, that preserves the natural order of a tree while immediately descending
36+
/// into sub-trees.
37+
///
38+
/// `state` can be passed to re-use memory during multiple invocations.
39+
pub fn depthfirst<StateMut, Find, V>(
40+
root: ObjectId,
41+
mut state: StateMut,
42+
objects: Find,
43+
delegate: &mut V,
44+
) -> Result<(), Error>
45+
where
46+
Find: gix_object::Find,
47+
StateMut: BorrowMut<State>,
48+
V: Visit,
49+
{
50+
enum Machine {
51+
GetTree(ObjectId),
52+
Iterate {
53+
tree_buf: Vec<u8>,
54+
byte_offset_to_next_entry: usize,
55+
},
56+
}
57+
58+
let state = state.borrow_mut();
59+
let mut stack = vec![Machine::GetTree(root)];
60+
'outer: while let Some(item) = stack.pop() {
61+
match item {
62+
Machine::GetTree(id) => {
63+
let mut buf = state.pop_buf();
64+
objects.find_tree_iter(&id, &mut buf)?;
65+
stack.push(Machine::Iterate {
66+
tree_buf: buf,
67+
byte_offset_to_next_entry: 0,
68+
});
69+
}
70+
Machine::Iterate {
71+
tree_buf: buf,
72+
byte_offset_to_next_entry,
73+
} => {
74+
let mut iter = TreeRefIter::from_bytes(&buf[byte_offset_to_next_entry..]);
75+
delegate.pop_back_tracked_path_and_set_current();
76+
while let Some(entry) = iter.next() {
77+
let entry = entry?;
78+
if entry.mode.is_tree() {
79+
delegate.push_path_component(entry.filename);
80+
let res = delegate.visit_tree(&entry);
81+
delegate.pop_path_component();
82+
match res {
83+
Action::Continue => {
84+
delegate.push_back_tracked_path_component(entry.filename);
85+
}
86+
Action::Cancel => break 'outer,
87+
Action::Skip => continue,
88+
}
89+
90+
let next_step = Machine::GetTree(entry.oid.to_owned());
91+
stack.push(Machine::Iterate {
92+
byte_offset_to_next_entry: iter.offset_to_next_entry(&buf),
93+
tree_buf: buf,
94+
});
95+
stack.push(next_step);
96+
continue 'outer;
97+
} else {
98+
delegate.push_path_component(entry.filename);
99+
if let Action::Cancel = delegate.visit_nontree(&entry) {
100+
break 'outer;
101+
}
102+
delegate.pop_path_component();
103+
}
104+
}
105+
state.push_buf(buf);
106+
}
107+
}
108+
}
109+
Ok(())
110+
}
111+
}

gix-traverse/src/tree/mod.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ use gix_object::bstr::{BStr, BString};
55
/// A trait to allow responding to a traversal designed to observe all entries in a tree, recursively while keeping track of
66
/// paths if desired.
77
pub trait Visit {
8-
/// Sets the full path path in front of the queue so future calls to push and pop components affect it instead.
8+
/// Sets the full path in the back of the queue so future calls to push and pop components affect it instead.
9+
///
10+
/// Note that the first call is made without an accompanying call to [`Self::push_back_tracked_path_component()`]
11+
///
12+
/// This is used by the depth-first traversal of trees.
13+
fn pop_back_tracked_path_and_set_current(&mut self);
14+
/// Sets the full path in front of the queue so future calls to push and pop components affect it instead.
15+
///
16+
/// This is used by the breadth-first traversal of trees.
917
fn pop_front_tracked_path_and_set_current(&mut self);
1018
/// Append a `component` to the end of a path, which may be empty.
1119
fn push_back_tracked_path_component(&mut self, component: &BStr);
@@ -66,4 +74,8 @@ pub mod recorder;
6674

6775
///
6876
pub mod breadthfirst;
69-
pub use breadthfirst::impl_::traverse as breadthfirst;
77+
pub use breadthfirst::function::breadthfirst;
78+
79+
///
80+
pub mod depthfirst;
81+
pub use depthfirst::function::depthfirst;

gix-traverse/src/tree/recorder.rs

+6
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ impl Recorder {
9292
}
9393

9494
impl Visit for Recorder {
95+
fn pop_back_tracked_path_and_set_current(&mut self) {
96+
if let Some(Location::Path) = self.location {
97+
self.path = self.path_deque.pop_back().unwrap_or_default();
98+
}
99+
}
100+
95101
fn pop_front_tracked_path_and_set_current(&mut self) {
96102
if let Some(Location::Path) = self.location {
97103
self.path = self

gix-traverse/tests/Cargo.toml

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ edition = "2021"
1111
rust-version = "1.65"
1212

1313
[[test]]
14-
name = "test"
15-
path = "traverse.rs"
14+
name = "traverse"
15+
path = "traverse/main.rs"
1616

1717
[dev-dependencies]
18+
insta = "1.40.0"
1819
gix-traverse = { path = ".." }
1920
gix-testtools = { path = "../../tests/tools" }
2021
gix-odb = { path = "../../gix-odb" }
File renamed without changes.

0 commit comments

Comments
 (0)