diff --git a/Cargo.lock b/Cargo.lock index d193c9be43a..2da56a0c4ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -660,6 +660,14 @@ dependencies = [ [[package]] name = "git-commitgraph" version = "0.0.0" +dependencies = [ + "bstr", + "byteorder", + "filebuffer", + "git-object", + "quick-error 2.0.0", + "tempfile", +] [[package]] name = "git-features" diff --git a/Makefile b/Makefile index 3abcbbaff46..0784f43d822 100644 --- a/Makefile +++ b/Makefile @@ -94,6 +94,8 @@ check: ## Build all code in suitable configurations && cargo check --features fast-sha1 \ && cargo check --features interrupt-handler \ && cargo check --features disable-interrupts + cd git-commitgraph && cargo check --all-features \ + && cargo check unit-tests: ## run all unit tests cargo test --all --no-fail-fast diff --git a/README.md b/README.md index 8f25b7605bd..1381e6013d6 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,9 @@ become available. * [ ] API documentation with examples ### git-commitgraph - * Access to all capabilities provided by the file format, as well as their maintenance + * [x] read-only access + * [x] Graph lookup of commit information to obtain timestamps, generation and parents + * [ ] create and update graphs and graph files * [ ] API documentation with examples ### git-config diff --git a/git-commitgraph/Cargo.toml b/git-commitgraph/Cargo.toml index a4879e6f301..390529b2f40 100644 --- a/git-commitgraph/Cargo.toml +++ b/git-commitgraph/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "git-commitgraph" version = "0.0.0" -repository = "https://github.com/Byron/git-oxide" +repository = "https://github.com/Byron/gitxoxide" documentation = "https://git-scm.com/docs/commit-graph#:~:text=The%20commit-graph%20file%20is%20a%20supplemental%20data%20structure,or%20in%20the%20info%20directory%20of%20an%20alternate." license = "MIT/Apache-2.0" -description = "A WIP crate of the gitoxide project dedicated implementing the git commitgraph file format and its maintenance" -authors = ["Sebastian Thiel "] +description = "A crate of the gitoxide project dedicated implementing the git commitgraph file format and its maintenance" +authors = ["Conor Davis ", "Sebastian Thiel "] edition = "2018" [lib] @@ -14,3 +14,12 @@ doctest = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +git-object = { version = "^0.4.0", path = "../git-object" } + +bstr = { version = "0.2.13", default-features = false, features = ["std"] } +byteorder = "1.2.3" +filebuffer = "0.4.0" +quick-error = "2.0.0" + +[dev-dependencies] +tempfile = "3.1.0" diff --git a/git-commitgraph/src/file/access.rs b/git-commitgraph/src/file/access.rs new file mode 100644 index 00000000000..5626784e7a0 --- /dev/null +++ b/git-commitgraph/src/file/access.rs @@ -0,0 +1,126 @@ +use crate::file::{self, commit::Commit, File, COMMIT_DATA_ENTRY_SIZE}; +use git_object::{borrowed, HashKind, SHA1_SIZE}; +use std::{ + convert::{TryFrom, TryInto}, + fmt::{Debug, Formatter}, + path::Path, +}; + +/// Access +impl File { + /// Returns the commit data for the commit located at the given lexigraphical position. + /// + /// `pos` must range from 0 to self.num_commits(). + /// + /// # Panics + /// + /// Panics if `pos` is out of bounds. + pub fn commit_at(&self, pos: file::Position) -> Commit<'_> { + Commit::new(self, pos) + } + + pub fn hash_kind(&self) -> HashKind { + HashKind::Sha1 + } + + /// Returns 20 bytes sha1 at the given index in our list of (sorted) sha1 hashes. + /// The position ranges from 0 to self.num_commits() + // copied from git-odb/src/pack/index/access.rs + pub fn id_at(&self, pos: file::Position) -> borrowed::Id<'_> { + assert!( + pos.0 < self.num_commits(), + "expected lexigraphical position less than {}, got {}", + self.num_commits(), + pos.0 + ); + let pos: usize = pos + .0 + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let start = self.oid_lookup_offset + (pos * SHA1_SIZE); + borrowed::Id::try_from(&self.data[start..start + SHA1_SIZE]).expect("20 bytes SHA1 to be alright") + } + + pub fn iter_base_graph_ids(&self) -> impl Iterator> { + let base_graphs_list = match self.base_graphs_list_offset { + Some(v) => &self.data[v..v + (SHA1_SIZE * self.base_graph_count as usize)], + None => &[], + }; + base_graphs_list + .chunks_exact(SHA1_SIZE) + .map(|bytes| borrowed::Id::try_from(bytes).expect("20 bytes SHA1 to be alright")) + } + + pub fn iter_commits(&self) -> impl Iterator> { + (0..self.num_commits()).map(move |i| self.commit_at(file::Position(i))) + } + + pub fn iter_ids(&self) -> impl Iterator> { + (0..self.num_commits()).map(move |i| self.id_at(file::Position(i))) + } + + // copied from git-odb/src/pack/index/access.rs + pub fn lookup(&self, id: borrowed::Id<'_>) -> Option { + let first_byte = id.first_byte() as usize; + let mut upper_bound = self.fan[first_byte]; + let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 }; + + // Bisect using indices + // TODO: Performance of V2 could possibly be better if we would be able to do a binary search + // on 20 byte chunks directly, but doing so requires transmuting and that is unsafe, even though + // it should not be if the bytes match up and the type has no destructor. + while lower_bound < upper_bound { + let mid = (lower_bound + upper_bound) / 2; + let mid_sha = self.id_at(file::Position(mid)); + + use std::cmp::Ordering::*; + match id.cmp(&mid_sha) { + Less => upper_bound = mid, + Equal => return Some(file::Position(mid)), + Greater => lower_bound = mid + 1, + } + } + None + } + + /// Returns the number of commits in this graph file. + /// + /// The maximum valid `file::Position` that can be used with this file is one less than + /// `num_commits()`. + pub fn num_commits(&self) -> u32 { + self.fan[255] + } + + pub fn path(&self) -> &Path { + &self.path + } +} + +impl File { + /// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk. + pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] { + assert!( + pos.0 < self.num_commits(), + "expected lexigraphical position less than {}, got {}", + self.num_commits(), + pos.0 + ); + let pos: usize = pos + .0 + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let start = self.commit_data_offset + (pos * COMMIT_DATA_ENTRY_SIZE); + &self.data[start..start + COMMIT_DATA_ENTRY_SIZE] + } + + /// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk. + pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> { + Some(&self.data[self.extra_edges_list_range.clone()?]) + } +} + +impl Debug for File { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, r#"File("{:?}")"#, self.path.display()) + } +} diff --git a/git-commitgraph/src/file/commit.rs b/git-commitgraph/src/file/commit.rs new file mode 100644 index 00000000000..05e9d71e7b6 --- /dev/null +++ b/git-commitgraph/src/file/commit.rs @@ -0,0 +1,258 @@ +use crate::{ + file::{self, File}, + graph, +}; +use byteorder::{BigEndian, ByteOrder}; +use git_object::{borrowed, owned, SHA1_SIZE}; +use quick_error::quick_error; +use std::{ + convert::{TryFrom, TryInto}, + fmt::{Debug, Formatter}, + slice::Chunks, +}; + +quick_error! { + #[derive(Debug)] + pub enum Error { + ExtraEdgesListOverflow(commit: owned::Id) { + display( + "commit {}'s extra edges overflows the commit-graph file's extra edges list", + commit, + ) + } + FirstParentIsExtraEdgeIndex(commit: owned::Id) { + display( + "commit {}'s first parent is an extra edge index, which is invalid", + commit, + ) + } + MissingExtraEdgesList(commit: owned::Id) { + display( + "commit {} has extra edges, but commit-graph file has no extra edges list", + commit, + ) + } + SecondParentWithoutFirstParent(commit: owned::Id) { + display("commit {} has a second parent but not a first parent", commit) + } + } +} + +// Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for +// NO_PARENT. +const NO_PARENT: u32 = 0x7000_0000; +const EXTENDED_EDGES_MASK: u32 = 0x8000_0000; + +pub struct Commit<'a> { + file: &'a File, + pos: file::Position, + // We can parse the below fields lazily if needed. + commit_timestamp: u64, + generation: u32, + parent1: ParentEdge, + parent2: ParentEdge, + root_tree_id: borrowed::Id<'a>, +} + +impl<'a> Commit<'a> { + pub(crate) fn new(file: &'a File, pos: file::Position) -> Self { + let bytes = file.commit_data_bytes(pos); + Commit { + file, + pos, + root_tree_id: borrowed::Id::try_from(&bytes[..SHA1_SIZE]).expect("20 bytes SHA1 to be alright"), + parent1: ParentEdge::from_raw(BigEndian::read_u32(&bytes[SHA1_SIZE..SHA1_SIZE + 4])), + parent2: ParentEdge::from_raw(BigEndian::read_u32(&bytes[SHA1_SIZE + 4..SHA1_SIZE + 8])), + generation: BigEndian::read_u32(&bytes[SHA1_SIZE + 8..SHA1_SIZE + 12]) >> 2, + commit_timestamp: BigEndian::read_u64(&bytes[SHA1_SIZE + 8..SHA1_SIZE + 16]) & 0x0003_ffff_ffff, + } + } + + /// Returns the committer timestamp of this commit. + /// + /// The value is the number of seconds since 1970-01-01 00:00:00 UTC. + pub fn committer_timestamp(&self) -> u64 { + self.commit_timestamp + } + + /// Returns the generation number of this commit. + /// + /// Commits without parents have generation number 1. Commits with parents have a generation + /// number that is the max of their parents' generation numbers + 1. + pub fn generation(&self) -> u32 { + self.generation + } + + pub fn iter_parents(&'a self) -> impl Iterator> + 'a { + // I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported + // fuse-after-first-error behavior, and b) was significantly shorter or more understandable + // than ParentIterator. So here we are. + ParentIterator { + commit_data: self, + state: ParentIteratorState::First, + } + } + + pub fn id(&self) -> borrowed::Id<'_> { + self.file.id_at(self.pos) + } + + pub fn parent1(&self) -> Result, Error> { + self.iter_parents().next().transpose() + } + + pub fn root_tree_id(&self) -> borrowed::Id<'_> { + self.root_tree_id + } +} + +impl<'a> Debug for Commit<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}", + self.id(), + self.pos, + self.generation(), + self.root_tree_id(), + self.parent1, + self.parent2, + ) + } +} + +impl<'a> Eq for Commit<'a> {} + +impl<'a> PartialEq for Commit<'a> { + fn eq(&self, other: &Self) -> bool { + self.file as *const File == other.file as *const File && self.pos == other.pos + } +} + +pub struct ParentIterator<'a> { + commit_data: &'a Commit<'a>, + state: ParentIteratorState<'a>, +} + +impl<'a> Iterator for ParentIterator<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted); + match state { + ParentIteratorState::First => match self.commit_data.parent1 { + ParentEdge::None => match self.commit_data.parent2 { + ParentEdge::None => None, + _ => Some(Err(Error::SecondParentWithoutFirstParent(self.commit_data.id().into()))), + }, + ParentEdge::GraphPosition(pos) => { + self.state = ParentIteratorState::Second; + Some(Ok(pos)) + } + ParentEdge::ExtraEdgeIndex(_) => { + Some(Err(Error::FirstParentIsExtraEdgeIndex(self.commit_data.id().into()))) + } + }, + ParentIteratorState::Second => match self.commit_data.parent2 { + ParentEdge::None => None, + ParentEdge::GraphPosition(pos) => Some(Ok(pos)), + ParentEdge::ExtraEdgeIndex(extra_edge_index) => { + if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() { + let start_offset: usize = extra_edge_index + .try_into() + .expect("an architecture able to hold 32 bits of integer"); + let start_offset = start_offset + .checked_mul(4) + .expect("an extended edge index small enough to fit in usize"); + if let Some(tail) = extra_edges_list.get(start_offset..) { + self.state = ParentIteratorState::Extra(tail.chunks(4)); + // This recursive call is what blocks me from replacing ParentIterator + // with a std::iter::from_fn closure. + self.next() + } else { + Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) + } + } else { + Some(Err(Error::MissingExtraEdgesList(self.commit_data.id().into()))) + } + } + }, + ParentIteratorState::Extra(mut chunks) => { + if let Some(chunk) = chunks.next() { + let extra_edge = BigEndian::read_u32(chunk); + match ExtraEdge::from_raw(extra_edge) { + ExtraEdge::Internal(pos) => { + self.state = ParentIteratorState::Extra(chunks); + Some(Ok(pos)) + } + ExtraEdge::Last(pos) => Some(Ok(pos)), + } + } else { + Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) + } + } + ParentIteratorState::Exhausted => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + match (&self.state, self.commit_data.parent1, self.commit_data.parent2) { + (ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)), + (ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)), + (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None), + (ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)), + (ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)), + (ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)), + (ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None), + (ParentIteratorState::Extra(_), _, _) => (1, None), + (ParentIteratorState::Exhausted, _, _) => (0, Some(0)), + } + } +} + +#[derive(Debug)] +enum ParentIteratorState<'a> { + First, + Second, + Extra(Chunks<'a, u8>), + Exhausted, +} + +#[derive(Clone, Copy, Debug)] +enum ParentEdge { + None, + GraphPosition(graph::Position), + ExtraEdgeIndex(u32), +} + +impl ParentEdge { + pub fn from_raw(raw: u32) -> ParentEdge { + if raw == NO_PARENT { + return ParentEdge::None; + } + if raw & EXTENDED_EDGES_MASK != 0 { + ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK) + } else { + ParentEdge::GraphPosition(graph::Position(raw)) + } + } +} + +const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000; + +enum ExtraEdge { + Internal(graph::Position), + Last(graph::Position), +} + +impl ExtraEdge { + pub fn from_raw(raw: u32) -> Self { + if raw & LAST_EXTENDED_EDGE_MASK != 0 { + Self::Last(graph::Position(raw & !LAST_EXTENDED_EDGE_MASK)) + } else { + Self::Internal(graph::Position(raw)) + } + } +} diff --git a/git-commitgraph/src/file/init.rs b/git-commitgraph/src/file/init.rs new file mode 100644 index 00000000000..a31e5338c1e --- /dev/null +++ b/git-commitgraph/src/file/init.rs @@ -0,0 +1,315 @@ +use crate::file::{File, COMMIT_DATA_ENTRY_SIZE, FAN_LEN, SIGNATURE}; +use bstr::ByteSlice; +use byteorder::{BigEndian, ByteOrder}; +use filebuffer::FileBuffer; +use git_object::SHA1_SIZE; +use quick_error::quick_error; +use std::{ + convert::{TryFrom, TryInto}, + ops::Range, + path::Path, +}; + +type ChunkId = [u8; 4]; + +quick_error! { + #[derive(Debug)] + pub enum Error { + BaseGraphMismatch(from_header: u8, from_chunk: u32) { + display( + "Commit-graph {} chunk contains {} base graphs, but commit-graph file header claims {} base graphs", + BASE_GRAPHS_LIST_CHUNK_ID.as_bstr(), + from_chunk, + from_header, + ) + } + CommitCountMismatch(chunk1_id: ChunkId, chunk1_commits: u32, chunk2_id: ChunkId, chunk2_commits: u32) { + display( + "Commit-graph {:?} chunk contains {} commits, but {:?} chunk contains {} commits", + chunk1_id.as_bstr(), + chunk1_commits, + chunk2_id.as_bstr(), + chunk2_commits, + ) + } + Corrupt(msg: String) { + display("{}", msg) + } + DuplicateChunk(id: ChunkId) { + display("Commit-graph file contains multiple {:?} chunks", id.as_bstr()) + } + // This error case is disabled, as git allows extra garbage in the extra edges list. + // ExtraEdgesOverflow { + // display("The last entry in commit-graph's extended edges list does is not marked as being terminal") + // } + InvalidChunkSize(id: ChunkId, msg: String) { + display("Commit-graph chunk {:?} has invalid size: {}", id.as_bstr(), msg) + } + Io(err: std::io::Error, path: std::path::PathBuf) { + display("Could not open commit-graph file at '{}'", path.display()) + source(err) + } + MissingChunk(id: ChunkId) { + display("Missing required chunk {:?}", id.as_bstr()) + } + UnsupportedHashVersion(version: u8) { + display("Commit-graph file uses unsupported hash version: {}", version) + } + UnsupportedVersion(version: u8) { + display("Unsupported commit-graph file version: {}", version) + } + } +} + +const CHUNK_LOOKUP_SIZE: usize = 12; +const HEADER_LEN: usize = 8; +const MIN_FILE_SIZE: usize = HEADER_LEN + ((MIN_CHUNKS + 1) * CHUNK_LOOKUP_SIZE); +const OID_LOOKUP_ENTRY_SIZE: usize = SHA1_SIZE; + +// Required chunks: OIDF, OIDL, CDAT +const MIN_CHUNKS: usize = 3; +const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE"; +const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT"; +const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE"; +const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF"; +const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL"; +const SENTINEL_CHUNK_ID: ChunkId = [0u8; 4]; + +impl File { + pub fn at(path: impl AsRef) -> Result { + Self::try_from(path.as_ref()) + } +} + +impl TryFrom<&Path> for File { + type Error = Error; + + fn try_from(path: &Path) -> Result { + let data = FileBuffer::open(path).map_err(|e| Error::Io(e, path.to_owned()))?; + let data_size = data.len(); + if data_size < MIN_FILE_SIZE { + return Err(Error::Corrupt( + "Commit-graph file too small even for an empty graph".to_owned(), + )); + } + + let mut ofs = 0; + if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE { + return Err(Error::Corrupt( + "Commit-graph file does not start with expected signature".to_owned(), + )); + } + ofs += SIGNATURE.len(); + + match data[ofs] { + 1 => (), + x => { + return Err(Error::UnsupportedVersion(x)); + } + }; + ofs += 1; + + match data[ofs] { + 1 => (), + x => { + return Err(Error::UnsupportedHashVersion(x)); + } + }; + ofs += 1; + + let chunk_count = data[ofs]; + // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make + // it redundant. + ofs += 1; + + let base_graph_count = data[ofs]; + ofs += 1; + + let chunk_lookup_end = ofs + ((chunk_count as usize + 1) * CHUNK_LOOKUP_SIZE); + if chunk_lookup_end > data_size { + return Err(Error::Corrupt(format!( + "Commit-graph file is too small to hold {} chunks", + chunk_count + ))); + } + + let mut base_graphs_list_offset: Option = None; + let mut commit_data_offset: Option = None; + let mut commit_data_count = 0u32; + let mut extra_edges_list_range: Option> = None; + let mut fan_offset: Option = None; + let mut oid_lookup_offset: Option = None; + let mut oid_lookup_count = 0u32; + + let mut chunk_id: ChunkId = data[ofs..ofs + 4].try_into().expect("ChunkId to accept 4 bytes"); + ofs += 4; + + let mut chunk_offset: usize = BigEndian::read_u64(&data[ofs..ofs + 8]) + .try_into() + .expect("an offset small enough to fit a usize"); + if chunk_offset < chunk_lookup_end { + return Err(Error::Corrupt(format!( + "Commit-graph chunk 0 has invalid offset {} (must be at least {})", + chunk_offset, chunk_lookup_end + ))); + } + ofs += 8; + + for _ in 0..chunk_count { + let next_chunk_id: ChunkId = data[ofs..ofs + 4].try_into().expect("ChunkId to accept 4 bytes"); + ofs += 4; + + let next_chunk_offset: usize = BigEndian::read_u64(&data[ofs..ofs + 8]) + .try_into() + .expect("an offset small enough to fit a usize"); + ofs += 8; + + let chunk_size: usize = next_chunk_offset + .checked_sub(chunk_offset) + .ok_or_else(|| Error::InvalidChunkSize(chunk_id, "size is negative".to_string()))?; + if next_chunk_offset >= data_size { + return Err(Error::InvalidChunkSize( + chunk_id, + "chunk extends beyond end of file".to_string(), + )); + } + + match chunk_id { + BASE_GRAPHS_LIST_CHUNK_ID => { + if base_graphs_list_offset.is_some() { + return Err(Error::DuplicateChunk(chunk_id)); + } + if chunk_size % SHA1_SIZE != 0 { + return Err(Error::InvalidChunkSize( + chunk_id, + format!("chunk size {} is not a multiple of {}", chunk_size, SHA1_SIZE), + )); + } + let chunk_base_graph_count = (chunk_size / SHA1_SIZE) as u32; + if chunk_base_graph_count != base_graph_count as u32 { + return Err(Error::BaseGraphMismatch(base_graph_count, chunk_base_graph_count)); + } + base_graphs_list_offset = Some(chunk_offset); + } + COMMIT_DATA_CHUNK_ID => { + if commit_data_offset.is_some() { + return Err(Error::DuplicateChunk(chunk_id)); + } + if chunk_size % COMMIT_DATA_ENTRY_SIZE != 0 { + return Err(Error::InvalidChunkSize( + chunk_id, + format!( + "chunk size {} is not a multiple of {}", + chunk_size, COMMIT_DATA_ENTRY_SIZE + ), + )); + } + commit_data_offset = Some(chunk_offset); + commit_data_count = (chunk_size / COMMIT_DATA_ENTRY_SIZE) as u32; + } + EXTENDED_EDGES_LIST_CHUNK_ID => { + if extra_edges_list_range.is_some() { + return Err(Error::DuplicateChunk(chunk_id)); + } + + extra_edges_list_range = Some(Range { + start: chunk_offset, + end: next_chunk_offset, + }) + } + OID_FAN_CHUNK_ID => { + if fan_offset.is_some() { + return Err(Error::DuplicateChunk(chunk_id)); + } + let expected_size = 4 * FAN_LEN; + if chunk_size != expected_size { + return Err(Error::InvalidChunkSize( + chunk_id, + format!("expected chunk length {}, got {}", expected_size, chunk_size), + )); + } + fan_offset = Some(chunk_offset); + } + OID_LOOKUP_CHUNK_ID => { + if oid_lookup_offset.is_some() { + return Err(Error::DuplicateChunk(chunk_id)); + } + if chunk_size % OID_LOOKUP_ENTRY_SIZE != 0 { + return Err(Error::InvalidChunkSize( + chunk_id, + format!( + "chunk size {} is not a multiple of {}", + chunk_size, OID_LOOKUP_ENTRY_SIZE + ), + )); + } + oid_lookup_offset = Some(chunk_offset); + oid_lookup_count = (chunk_size / OID_LOOKUP_ENTRY_SIZE) as u32; + // TODO(ST): Figure out how to handle this. Don't know what to do with the commented code. + // git allows extra garbage in the extra edges list chunk? + // if oid_lookup_count > 0 { + // let last_edge = &data[next_chunk_offset - 4..next_chunk_offset]; + // let last_edge = BigEndian::read_u32(last_edge); + // if let ExtraEdge::Internal(_) = ExtraEdge::from_raw(last_edge) { + // return Err(Error::ExtraEdgesListOverflow); + // } + // } + } + _ => {} + } + + chunk_id = next_chunk_id; + chunk_offset = next_chunk_offset; + } + if chunk_id != SENTINEL_CHUNK_ID { + return Err(Error::Corrupt(format!( + "Commit-graph file has invalid last chunk ID: {:?}", + chunk_id.as_bstr() + ))); + } + + let fan_offset = fan_offset.ok_or_else(|| Error::MissingChunk(OID_FAN_CHUNK_ID))?; + let oid_lookup_offset = oid_lookup_offset.ok_or_else(|| Error::MissingChunk(OID_LOOKUP_CHUNK_ID))?; + let commit_data_offset = commit_data_offset.ok_or_else(|| Error::MissingChunk(COMMIT_DATA_CHUNK_ID))?; + if base_graph_count > 0 && base_graphs_list_offset == None { + return Err(Error::MissingChunk(BASE_GRAPHS_LIST_CHUNK_ID)); + } + + let (fan, _) = read_fan(&data[fan_offset..]); + if oid_lookup_count != fan[255] { + return Err(Error::CommitCountMismatch( + OID_FAN_CHUNK_ID, + fan[255], + OID_LOOKUP_CHUNK_ID, + oid_lookup_count, + )); + } + if commit_data_count != fan[255] { + return Err(Error::CommitCountMismatch( + OID_FAN_CHUNK_ID, + fan[255], + COMMIT_DATA_CHUNK_ID, + commit_data_count, + )); + } + Ok(File { + base_graph_count, + base_graphs_list_offset, + commit_data_offset, + data, + extra_edges_list_range, + fan, + oid_lookup_offset, + path: path.to_owned(), + }) + } +} + +// Copied from git-odb/pack/index/init.rs +fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { + let mut fan = [0; FAN_LEN]; + for (c, f) in d.chunks(4).zip(fan.iter_mut()) { + *f = BigEndian::read_u32(c); + } + (fan, FAN_LEN * 4) +} diff --git a/git-commitgraph/src/file/mod.rs b/git-commitgraph/src/file/mod.rs new file mode 100644 index 00000000000..3441390e91e --- /dev/null +++ b/git-commitgraph/src/file/mod.rs @@ -0,0 +1,51 @@ +//! Operations on a single commit-graph file. +mod access; +pub mod commit; + +mod init; +pub use init::Error; + +pub use commit::Commit; +use filebuffer::FileBuffer; +use git_object::SHA1_SIZE; +use std::{ + fmt::{Display, Formatter}, + ops::Range, + path::PathBuf, +}; + +const COMMIT_DATA_ENTRY_SIZE: usize = SHA1_SIZE + 16; +const FAN_LEN: usize = 256; +const SIGNATURE: &[u8] = b"CGPH"; + +/// A single commit-graph file. +/// +/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple +/// files, all interesting graph operations belong on `Graph`. +pub struct File { + base_graph_count: u8, + base_graphs_list_offset: Option, + commit_data_offset: usize, + data: FileBuffer, + extra_edges_list_range: Option>, + fan: [u32; FAN_LEN], + oid_lookup_offset: usize, + path: PathBuf, +} + +/// The position of a given commit within a graph file, starting at 0. +/// +/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexigraphical position +/// is its position in this ordering. If a commit graph spans multiple files, each file's commits +/// start at lexigraphical position 0, so it is unique across a single file but is not unique across +/// the whole commit graph. Each commit also has a graph position (`graph::Position`), which is unique +/// across the whole commit graph. In order to avoid accidentally mixing lexigraphical positions with graph +/// positions, distinct types are used for each. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Position(pub u32); + +impl Display for Position { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/git-commitgraph/src/graph/access.rs b/git-commitgraph/src/graph/access.rs new file mode 100644 index 00000000000..4a3a7616c2f --- /dev/null +++ b/git-commitgraph/src/graph/access.rs @@ -0,0 +1,88 @@ +use crate::{ + file::{self, Commit, File}, + graph::{self, Graph}, +}; +use git_object::borrowed; + +/// Access +impl Graph { + pub fn commit_at(&self, pos: graph::Position) -> Commit<'_> { + let r = self.lookup_by_pos(pos); + r.file.commit_at(r.pos) + } + + pub fn commit_by_id(&self, id: borrowed::Id<'_>) -> Option> { + let r = self.lookup_by_id(id)?; + Some(r.file.commit_at(r.file_pos)) + } + + pub fn id_at(&self, pos: graph::Position) -> borrowed::Id<'_> { + let r = self.lookup_by_pos(pos); + r.file.id_at(r.pos) + } + + /// Iterate over commits in unsorted order. + pub fn iter_commits(&self) -> impl Iterator> { + self.files.iter().flat_map(|file| file.iter_commits()) + } + + /// Iterate over commit IDs in unsorted order. + pub fn iter_ids(&self) -> impl Iterator> { + self.files.iter().flat_map(|file| file.iter_ids()) + } + + pub fn lookup(&self, id: borrowed::Id<'_>) -> Option { + Some(self.lookup_by_id(id)?.graph_pos) + } + + pub fn num_commits(&self) -> u32 { + self.files.iter().map(|f| f.num_commits()).sum() + } +} + +/// Access fundamentals +impl Graph { + fn lookup_by_id(&self, id: borrowed::Id<'_>) -> Option> { + let mut current_file_start = 0; + for file in &self.files { + if let Some(lex_pos) = file.lookup(id) { + return Some(LookupByIdResult { + file, + file_pos: lex_pos, + graph_pos: graph::Position(current_file_start + lex_pos.0), + }); + } + current_file_start += file.num_commits(); + } + None + } + + fn lookup_by_pos(&self, pos: graph::Position) -> LookupByPositionResult<'_> { + let mut remaining = pos.0; + for file in &self.files { + match remaining.checked_sub(file.num_commits()) { + Some(v) => remaining = v, + None => { + return LookupByPositionResult { + file, + pos: file::Position(remaining), + } + } + } + } + panic!("graph position too large: {}", pos.0); + } +} + +#[derive(Clone)] +struct LookupByIdResult<'a> { + pub file: &'a File, + pub graph_pos: graph::Position, + pub file_pos: file::Position, +} + +#[derive(Clone)] +struct LookupByPositionResult<'a> { + pub file: &'a File, + pub pos: file::Position, +} diff --git a/git-commitgraph/src/graph/init.rs b/git-commitgraph/src/graph/init.rs new file mode 100644 index 00000000000..4e6817cd31a --- /dev/null +++ b/git-commitgraph/src/graph/init.rs @@ -0,0 +1,89 @@ +use crate::{ + file::{self, File}, + Graph, MAX_COMMITS, +}; +use git_object::HashKind; +use quick_error::quick_error; +use std::{ + io::{BufRead, BufReader}, + path::{Path, PathBuf}, +}; + +quick_error! { + #[derive(Debug)] + pub enum Error { + File(err: file::Error, path: PathBuf) { + display("{}", path.display()) + source(err) + } + HashVersionMismatch(path1: PathBuf, hash1: HashKind, path2: PathBuf, hash2: HashKind) { + display( + "Commit-graph files mismatch: '{}' uses hash {:?}, but '{}' uses hash {:?}", + path1.display(), + hash1, + path2.display(), + hash2, + ) + } + Io(err: std::io::Error, path: PathBuf) { + display("Could not open commit-graph file at '{}'", path.display()) + source(err) + } + TooManyCommits(num_commits: u64) { + display( + "Commit-graph files contain {} commits altogether, but only {} commits are allowed", + num_commits, + MAX_COMMITS, + ) + } + } +} + +/// Instantiate a `Graph` from various sources +impl Graph { + pub fn from_info_dir(info_dir: impl AsRef) -> Result { + Self::from_single_file(info_dir.as_ref()) + .or_else(|_| Self::from_split_chain(info_dir.as_ref().join("commit-graphs"))) + } + + pub fn from_single_file(info_dir: impl AsRef) -> Result { + let single_graph_file = info_dir.as_ref().join("commit-graph"); + let file = File::at(&single_graph_file).map_err(|e| Error::File(e, single_graph_file.clone()))?; + Self::new(vec![file]) + } + + pub fn from_split_chain(commit_graphs_dir: impl AsRef) -> Result { + let commit_graphs_dir = commit_graphs_dir.as_ref(); + let chain_file_path = commit_graphs_dir.join("commit-graph-chain"); + let chain_file = std::fs::File::open(&chain_file_path).map_err(|e| Error::Io(e, chain_file_path.clone()))?; + let mut files = Vec::new(); + for line in BufReader::new(chain_file).lines() { + let hash = line.map_err(|e| Error::Io(e, chain_file_path.clone()))?; + let graph_file_path = commit_graphs_dir.join(format!("graph-{}.graph", hash)); + files.push(File::at(&graph_file_path).map_err(|e| Error::File(e, graph_file_path.clone()))?); + } + Self::new(files) + } + + pub fn new(files: Vec) -> Result { + let num_commits: u64 = files.iter().map(|f| f.num_commits() as u64).sum(); + if num_commits > MAX_COMMITS as u64 { + return Err(Error::TooManyCommits(num_commits)); + } + + for window in files.windows(2) { + let f1 = &window[0]; + let f2 = &window[1]; + if f1.hash_kind() != f2.hash_kind() { + return Err(Error::HashVersionMismatch( + f1.path().to_owned(), + f1.hash_kind(), + f2.path().to_owned(), + f2.hash_kind(), + )); + } + } + + Ok(Self { files }) + } +} diff --git a/git-commitgraph/src/graph/mod.rs b/git-commitgraph/src/graph/mod.rs new file mode 100644 index 00000000000..e113d7317fa --- /dev/null +++ b/git-commitgraph/src/graph/mod.rs @@ -0,0 +1,24 @@ +//! Operations on a complete commit graph. +mod access; +mod init; + +use crate::file::File; +use std::fmt; + +/// A complete commit graph. +/// +/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it +/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are +/// generated via `git commit-graph write ...` commands. +pub struct Graph { + files: Vec, +} + +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] +pub struct Position(pub u32); + +impl fmt::Display for Position { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} diff --git a/git-commitgraph/src/lib.rs b/git-commitgraph/src/lib.rs index d7a83e4f525..9295094e53e 100644 --- a/git-commitgraph/src/lib.rs +++ b/git-commitgraph/src/lib.rs @@ -1 +1,10 @@ -#![forbid(unsafe_code, rust_2018_idioms)] +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms)] + +pub mod file; +pub mod graph; + +pub use graph::Graph; + +/// The maximum number of commits that can be stored in a commit graph. +pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1; diff --git a/git-commitgraph/tests/access/mod.rs b/git-commitgraph/tests/access/mod.rs new file mode 100644 index 00000000000..85a410d7725 --- /dev/null +++ b/git-commitgraph/tests/access/mod.rs @@ -0,0 +1,70 @@ +use crate::{check_common, create_repo, inspect_refs}; +use git_commitgraph::Graph; + +#[test] +fn single_parent() -> crate::Result { + let repo_dir = create_repo("single_parent.sh"); + let refs = inspect_refs(repo_dir.path(), &["parent", "child"]); + let cg = Graph::from_info_dir(repo_dir.path().join(".git").join("objects").join("info"))?; + check_common(&cg, &refs); + + assert_eq!(cg.commit_at(refs["parent"].pos()).generation(), 1); + assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2); + + Ok(()) +} + +#[test] +fn octupus_merges() -> crate::Result { + let repo_dir = create_repo("octopus_merges.sh"); + let refs = inspect_refs( + repo_dir.path(), + &[ + "root", + "parent1", + "parent2", + "parent3", + "parent4", + "three_parents", + "four_parents", + ], + ); + let cg = Graph::from_info_dir(repo_dir.path().join(".git").join("objects").join("info"))?; + check_common(&cg, &refs); + + assert_eq!(cg.commit_at(refs["root"].pos()).generation(), 1); + assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 2); + assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 2); + assert_eq!(cg.commit_at(refs["parent3"].pos()).generation(), 2); + assert_eq!(cg.commit_at(refs["parent4"].pos()).generation(), 2); + assert_eq!(cg.commit_at(refs["three_parents"].pos()).generation(), 3); + assert_eq!(cg.commit_at(refs["four_parents"].pos()).generation(), 3); + + Ok(()) +} + +#[test] +fn single_commit() -> crate::Result { + let repo_dir = create_repo("single_commit.sh"); + let refs = inspect_refs(repo_dir.path(), &["commit"]); + let cg = Graph::from_info_dir(repo_dir.path().join(".git").join("objects").join("info"))?; + check_common(&cg, &refs); + + assert_eq!(cg.commit_at(refs["commit"].pos()).generation(), 1); + + Ok(()) +} + +#[test] +fn two_parents() -> crate::Result { + let repo_dir = create_repo("two_parents.sh"); + let refs = inspect_refs(repo_dir.path(), &["parent1", "parent2", "child"]); + let cg = Graph::from_info_dir(repo_dir.path().join(".git").join("objects").join("info"))?; + check_common(&cg, &refs); + + assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 1); + assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 1); + assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2); + + Ok(()) +} diff --git a/git-commitgraph/tests/commitgraph.rs b/git-commitgraph/tests/commitgraph.rs new file mode 100644 index 00000000000..508724d9f47 --- /dev/null +++ b/git-commitgraph/tests/commitgraph.rs @@ -0,0 +1,163 @@ +use git_commitgraph::{graph::Position as GraphPosition, Graph}; +use git_object::{borrowed, owned}; +use std::{ + collections::{HashMap, HashSet}, + convert::{TryFrom, TryInto}, + hash::BuildHasher, + io::{BufRead, Cursor}, + path::{Path, PathBuf}, + process::Command, +}; + +type Result = std::result::Result<(), Box>; + +mod access; + +pub fn check_common(cg: &Graph, expected: &HashMap) { + assert_eq!( + usize::try_from(cg.num_commits()).expect("an architecture able to hold 32 bits of integer"), + expected.len() + ); + for ref_info in expected.values() { + assert_eq!(cg.id_at(ref_info.pos()), ref_info.id(), "id_at({})", ref_info.pos()); + assert_eq!( + cg.lookup(ref_info.id()), + Some(ref_info.pos()), + "lookup({})", + ref_info.id() + ); + + let expected_parents: Vec<_> = ref_info + .parent_ids() + .into_iter() + .map(|id| { + expected + .values() + .find(|item| item.id() == id) + .expect("find RefInfo by id") + }) + .collect(); + + let commit = cg.commit_at(ref_info.pos()); + assert_eq!(commit.id(), ref_info.id()); + assert_eq!(commit.root_tree_id(), ref_info.root_tree_id()); + assert_eq!( + commit.parent1().expect("failed to access commit's parent1"), + expected_parents.iter().map(|x| x.pos()).next() + ); + assert_eq!( + commit + .iter_parents() + .collect::, _>>() + .expect("failed to access commit's parents"), + expected_parents.iter().map(|x| x.pos()).collect::>() + ); + } + + assert_eq!( + cg.iter_ids().collect::>(), + expected.values().map(|x| x.id()).collect::>() + ); +} + +pub fn create_repo(script_path: &str) -> tempfile::TempDir { + let dir = tempfile::tempdir().expect("failed to create temp dir"); + let status = Command::new("bash") + .arg(fixture_path(script_path)) + .arg(dir.path()) + .env_remove("GIT_DIR") + .status() + .expect("failed to run repo script"); + assert!(status.success(), "repo script failed"); + dir +} + +pub fn fixture_path(path: &str) -> PathBuf { + PathBuf::from("tests").join("fixtures").join(path) +} + +pub fn hex_to_id(hex: &[u8]) -> owned::Id { + owned::Id::from_40_bytes_in_hex(hex).expect("40 bytes hex") +} + +pub struct RefInfo { + id: owned::Id, + parent_ids: Vec, + pos: GraphPosition, + root_tree_id: owned::Id, +} + +impl RefInfo { + pub fn id(&self) -> borrowed::Id { + self.id.to_borrowed() + } + + pub fn pos(&self) -> GraphPosition { + self.pos + } + + pub fn parent_ids(&self) -> impl IntoIterator { + self.parent_ids.iter().map(|x| x.to_borrowed()) + } + + pub fn root_tree_id(&self) -> borrowed::Id { + self.root_tree_id.to_borrowed() + } +} + +pub fn inspect_refs(repo_dir: impl AsRef, refs: &[&'static str]) -> HashMap { + let output = Command::new("git") + .arg("-C") + .arg(repo_dir.as_ref()) + .arg("show") + .arg("--no-patch") + .arg("--pretty=format:%S %H %T %P") + .args(refs) + .arg("--") + .env_remove("GIT_DIR") + .output() + .expect("failed to execute `git show`"); + // Output format: + let mut infos: Vec<_> = Cursor::new(output.stdout) + .lines() + .map(|x| x.expect("failed to read `git show` output")) + .map(|x| { + let parts = x.trim_end().split(' ').collect::>(); + ( + parts[0].to_string(), + owned::Id::from_40_bytes_in_hex(parts[1].as_bytes()).expect("40 bytes hex"), + owned::Id::from_40_bytes_in_hex(parts[2].as_bytes()).expect("40 bytes hex"), + parts[3..] + .iter() + .map(|x| owned::Id::from_40_bytes_in_hex(x.as_bytes()).expect("40 bytes hex")) + .collect(), + ) + }) + .collect(); + infos.sort_by_key(|x| x.1); + + let get_pos = |id: borrowed::Id| -> GraphPosition { + let pos: u32 = infos + .binary_search_by_key(&id, |x| x.1.to_borrowed()) + .expect("sorted_ids to contain id") + .try_into() + .expect("graph position to fit in u32"); + GraphPosition(pos) + }; + + infos + .iter() + .cloned() + .map(|(name, id, root_tree_id, parent_ids)| { + ( + name, + RefInfo { + id, + parent_ids, + root_tree_id, + pos: get_pos(id.to_borrowed()), + }, + ) + }) + .collect() +} diff --git a/git-commitgraph/tests/fixtures/create_fixtures.sh b/git-commitgraph/tests/fixtures/create_fixtures.sh new file mode 100755 index 00000000000..2f3be3f0b41 --- /dev/null +++ b/git-commitgraph/tests/fixtures/create_fixtures.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -eu -o pipefail + +parent_dir="$1" +mkdir "$parent_dir" + +script_dir=$(dirname "$(realpath -e "$0")") + +run() { + local target_dir=$parent_dir/$1 + local script=$script_dir/$1.sh + + local temp_dir=$(mktemp -d create_fixtures-XXXXXXXXXX) + trap "rm -rf $temp_dir" EXIT + "$script" "$temp_dir" + cp -dR "$temp_dir/.git/objects/" "$target_dir/" + rm -rf "$temp_dir" + trap - EXIT +} + +run bloom +run bloom_too_large +run octopus_merges +run single_commit +run single_parent +run split_chain +run two_parents + +#"$script_dir"/bloom.sh "$parent_dir/bloom" +#"$script_dir"/bloom_too_large.sh "$parent_dir/bloom_to_large" +#"$script_dir"/octopus_merges.sh "$parent_dir/octopus_merges" +#"$script_dir"/single_commit.sh "$parent_dir/single_commit" +#"$script_dir"/single_parent.sh "$parent_dir/single_parent" +#"$script_dir"/two_parents.sh "$parent_dir/two_parents" diff --git a/git-commitgraph/tests/fixtures/octopus_merges.sh b/git-commitgraph/tests/fixtures/octopus_merges.sh new file mode 100755 index 00000000000..e4e79a2ee6a --- /dev/null +++ b/git-commitgraph/tests/fixtures/octopus_merges.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_AUTHOR_DATE="2000-01-01 00:00:00 +0000" +export GIT_AUTHOR_EMAIL=author@example.com +export GIT_AUTHOR_NAME=author +export GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" +export GIT_COMMITTER_EMAIL=committer@example.com +export GIT_COMMITTER_NAME=committer + +mkdir -p "$1" +cd "$1" +git init -q +git config commit.gpgsign false + +git checkout -q --orphan root +git commit -q --allow-empty -m root + +git checkout -q -b parent1 root +git commit -q --allow-empty -m parent1 + +git checkout -q -b parent2 root +git commit -q --allow-empty -m parent2 + +git checkout -q -b parent3 root +git commit -q --allow-empty -m parent3 + +git checkout -q -b parent4 root +git commit -q --allow-empty -m parent4 + +git checkout -q -b three_parents parent1 +git merge -q -m three_parents --no-ff parent2 parent3 >/dev/null + +git checkout -q -b four_parents parent2 +git merge -q -m four_parents --no-ff parent1 parent3 parent4 >/dev/null + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/git-commitgraph/tests/fixtures/single_commit.sh b/git-commitgraph/tests/fixtures/single_commit.sh new file mode 100755 index 00000000000..e463303bfa1 --- /dev/null +++ b/git-commitgraph/tests/fixtures/single_commit.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -eu -o pipefail + +# The goal with this repo is to have the smallest commit-graph file possible, in the hopes that an +# off-by-one error +export GIT_AUTHOR_DATE="2000-01-01 00:00:00 +0000" +export GIT_AUTHOR_EMAIL=author@example.com +export GIT_AUTHOR_NAME=author +export GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" +export GIT_COMMITTER_EMAIL=committer@example.com +export GIT_COMMITTER_NAME=committer + +mkdir -p "$1" +cd "$1" +git init -q +git config commit.gpgsign false + +git checkout -q -b commit +git commit -q --allow-empty -m commit + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/git-commitgraph/tests/fixtures/single_parent.sh b/git-commitgraph/tests/fixtures/single_parent.sh new file mode 100755 index 00000000000..4641a07d57b --- /dev/null +++ b/git-commitgraph/tests/fixtures/single_parent.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_AUTHOR_DATE="2000-01-01 00:00:00 +0000" +export GIT_AUTHOR_EMAIL=author@example.com +export GIT_AUTHOR_NAME=author +export GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" +export GIT_COMMITTER_EMAIL=committer@example.com +export GIT_COMMITTER_NAME=committer + +mkdir -p "$1" +cd "$1" +git init -q +git config commit.gpgsign false + +git checkout -q -b parent +git commit -q --allow-empty -m parent + +git checkout -q -b child parent +git commit -q --allow-empty -m child + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/git-commitgraph/tests/fixtures/split_chain.sh b/git-commitgraph/tests/fixtures/split_chain.sh new file mode 100755 index 00000000000..8696e38dd89 --- /dev/null +++ b/git-commitgraph/tests/fixtures/split_chain.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_AUTHOR_DATE="2000-01-01 00:00:00 +0000" +export GIT_AUTHOR_EMAIL=author@example.com +export GIT_AUTHOR_NAME=author +export GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" +export GIT_COMMITTER_EMAIL=committer@example.com +export GIT_COMMITTER_NAME=committer + +mkdir -p "$1" +cd "$1" +git init -q +git config commit.gpgsign false + +git checkout -q -b commit1 +git commit -q --allow-empty -m commit1 +git checkout -q -b commit2 commit1 +git commit -q --allow-empty -m commit2 +git checkout -q -b commit3 commit2 +git commit -q --allow-empty -m commit3 + +git show-ref -s commit1 | git commit-graph write --no-progress --split=no-merge --stdin-commits +git show-ref -s commit2 | git commit-graph write --no-progress --split=no-merge --stdin-commits +git show-ref -s commit3 | git commit-graph write --no-progress --split=no-merge --stdin-commits +git repack -adq diff --git a/git-commitgraph/tests/fixtures/two_parents.sh b/git-commitgraph/tests/fixtures/two_parents.sh new file mode 100755 index 00000000000..119e5e2d3f2 --- /dev/null +++ b/git-commitgraph/tests/fixtures/two_parents.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu -o pipefail + +export GIT_AUTHOR_DATE="2000-01-01 00:00:00 +0000" +export GIT_AUTHOR_EMAIL=author@example.com +export GIT_AUTHOR_NAME=author +export GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" +export GIT_COMMITTER_EMAIL=committer@example.com +export GIT_COMMITTER_NAME=committer + +mkdir -p "$1" +cd "$1" +git init -q +git config commit.gpgsign false + +git checkout -q --orphan parent1 +git commit -q --allow-empty -m parent1 + +git checkout -q --orphan parent2 +git commit -q --allow-empty -m parent2 + +git checkout -q -b child parent1 +git merge -q --allow-unrelated-histories --no-ff -m child parent2 >/dev/null + +git commit-graph write --no-progress --reachable +git repack -adq diff --git a/tasks.md b/tasks.md index 68a3a980bed..44f7b103f4f 100644 --- a/tasks.md +++ b/tasks.md @@ -18,6 +18,21 @@ * [x] lookup uses alternates * [x] loose upgrade: jwalk powered iteration behind a feature flag * [ ] full docs +* **git-commitgraph** review + * [x] adjust tests to disable gpgsignatures + * [ ] ~~do graph results need a reference to their owning file?~~ + * Yes, as it allows to obtain additional information related to the item in the file itself, like `File::commit_at(…)` + * [ ] feature-toggled support for serde + * [ ] ~~make tests depend on checked-in fixtures, instead of generating them (and depend on git on CI), making it easy to recreate them~~ + * the tests currently rely on calling git, see `inspect_refs(…)` + * **Questions** + * ~~How can `Commit` return Graph positions? It doesn't seem to learn about an offset.~~ + * Parent IDs are indeed specified as graph positions, not file positions, as they may be in previous commit graph files. + * What to do with the ['extra-garbage'](https://github.com/Byron/gitoxide/blob/6f90beeb418480f9cd8bb7ae3b5db678b24103cb/git-commitgraph/src/file/init.rs#L248), + some code is commented out. + * **Future Work** + * A plumbing command to extract some value from the current implementation, maybe statistics, or verification + * Application of the command above in a stress test * **git-config** * A complete implementation, writing a the git remote configuration is needed for finalizing the clone * **git-ref**