diff --git a/Cargo.toml b/Cargo.toml index 42c1c2a..d24fdc3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ http-reqwest = ["gix/blocking-http-transport-reqwest-rust-tls"] [dependencies] -gix = { version = "0.54.1", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] } +gix = { version = "0.55.2", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] } serde = { version = "1", features = ["std", "derive"] } hex = { version = "0.4.3", features = ["serde"] } smartstring = { version = "1.0.1", features = ["serde"] } @@ -49,5 +49,5 @@ hashbrown = { version = "0.14.0", features = ["raw"] } [dev-dependencies] gix-testtools = "0.12.0" -crates-index = { version = "2.0.0", default-features = false, features = ["git-performance", "git-https"] } +crates-index = { version = "2.3.0", default-features = false, features = ["git-performance", "git-https"] } tempdir = "0.3.5" diff --git a/src/index/diff/delegate.rs b/src/index/diff/delegate.rs index b0eaa0a..a627d64 100644 --- a/src/index/diff/delegate.rs +++ b/src/index/diff/delegate.rs @@ -3,10 +3,14 @@ use crate::{Change, CrateVersion}; use ahash::{AHashSet, RandomState}; use bstr::BStr; use hashbrown::raw::RawTable; +use std::hash::Hasher; +use std::ops::Deref; #[derive(Default)] pub(crate) struct Delegate { changes: Vec, + /// All changes that happen within a file, along the line-number it happens in . + per_file_changes: Vec<(usize, Change)>, err: Option, } @@ -65,8 +69,8 @@ impl Delegate { if let Some(diff) = change.event.diff().transpose()? { let mut old_lines = AHashSet::with_capacity(1024); let location = change.location; - for line in diff.old.data.lines() { - old_lines.insert(line); + for (number, line) in diff.old.data.lines().enumerate() { + old_lines.insert(Line(number, line)); } // A RawTable is used to represent a Checksum -> CrateVersion map @@ -75,47 +79,52 @@ impl Delegate { let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024)); let hasher = RandomState::new(); - for line in diff.new.data.lines() { + for (number, line) in diff.new.data.lines().enumerate() { // first quickly check if the exact same line is already present in this file in that case we don't need to do anything else - if old_lines.remove(line) { + if old_lines.remove(&Line(number, line)) { continue; } // no need to check if the checksum already exists in the hashmap - // as each checksum appear only once + // as each checksum appears only once let new_version = version_from_json_line(line, location)?; new_versions.insert( hasher.hash_one(new_version.checksum), - new_version, - |rehashed| hasher.hash_one(rehashed.checksum), + (number, new_version), + |rehashed| hasher.hash_one(rehashed.1.checksum), ); } for line in old_lines.drain() { - let old_version = version_from_json_line(line, location)?; + let old_version = version_from_json_line(&line, location)?; let new_version = new_versions .remove_entry(hasher.hash_one(old_version.checksum), |version| { - version.checksum == old_version.checksum + version.1.checksum == old_version.checksum }); match new_version { - Some(new_version) => { + Some((_, new_version)) => { let change = match (old_version.yanked, new_version.yanked) { (true, false) => Change::Unyanked(new_version), (false, true) => Change::Yanked(new_version), _ => continue, }; - self.changes.push(change) + self.per_file_changes.push((line.0, change)) } - None => self.changes.push(Change::VersionDeleted(old_version)), + None => self + .per_file_changes + .push((line.0, Change::VersionDeleted(old_version))), } } - for version in new_versions.drain() { + for (number, version) in new_versions.drain() { let change = if version.yanked { Change::AddedAndYanked(version) } else { Change::Added(version) }; - self.changes.push(change); + self.per_file_changes.push((number, change)); } + self.per_file_changes.sort_by_key(|t| t.0); + self.changes + .extend(self.per_file_changes.drain(..).map(|t| t.1)); } } } @@ -130,6 +139,32 @@ impl Delegate { } } +/// A line that assumes there never are equal lines within a file which +/// is the case due to the checksum. +struct Line<'a>(usize, &'a [u8]); + +impl std::hash::Hash for Line<'_> { + fn hash(&self, state: &mut H) { + self.1.hash(state) + } +} + +impl PartialEq for Line<'_> { + fn eq(&self, other: &Self) -> bool { + self.1.eq(other.1) + } +} + +impl Eq for Line<'_> {} + +impl<'a> Deref for Line<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.1 + } +} + fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result { serde_json::from_slice(line).map_err(|err| Error::VersionDecode { source: err, diff --git a/src/index/diff/mod.rs b/src/index/diff/mod.rs index 43c0676..d4ccd58 100644 --- a/src/index/diff/mod.rs +++ b/src/index/diff/mod.rs @@ -86,14 +86,14 @@ impl Index { ) } - /// Return all `Change`s that are observed between the last time `peek_changes*(…)` was called + /// Return all [`Change`]s that are observed between the last time `peek_changes*(…)` was called /// and the latest state of the `crates.io` index repository, which is obtained by fetching /// the remote called `origin` or whatever is configured for the current `HEAD` branch and lastly /// what it should be based on knowledge about he crates index. - /// The `last_seen_reference()` will not be created or updated. + /// The [`Self::last_seen_reference()`] will not be created or updated. /// The second field in the returned tuple is the commit object to which the changes were provided. - /// If one would set the `last_seen_reference()` to that object, the effect is exactly the same - /// as if `fetch_changes(…)` had been called. + /// If one would set the [`Self::last_seen_reference()`] to that object, the effect is exactly the same + /// as if [`Self::fetch_changes()`] had been called. /// /// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow /// these operations to be interrupted gracefully. @@ -204,16 +204,19 @@ impl Index { )) } - /// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point + /// Similar to [`Self::changes()`], but requires `from` and `to` objects to be provided. They may point /// to either `Commit`s or `Tree`s. /// /// # Returns /// /// A list of atomic changes that were performed on the index /// between the two revisions. + /// + /// # Grouping and Ordering + /// /// The changes are grouped by the crate they belong to. - /// The order of the changes for each crate are **non-deterministic**. - /// The order of crates is also **non-deterministic**. + /// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending. + /// The order of crates is **non-deterministic**. /// /// If a specific order is required, the changes must be sorted by the caller. pub fn changes_between_commits( @@ -238,7 +241,7 @@ impl Index { delegate.into_result() } - /// Similar to `changes()`, but requires `ancestor_commit` and `current_commit` objects to be provided + /// Similar to [`Self::changes()`], but requires `ancestor_commit` and `current_commit` objects to be provided /// with `ancestor_commit` being in the ancestry of `current_commit`. /// /// If the invariants regarding `ancestor_commit` and `current_commit` are not upheld, we fallback @@ -250,6 +253,14 @@ impl Index { /// A list of atomic changes that were performed on the index /// between the two revisions, but looking at it one commit at a time, along with the `Order` /// that the changes are actually in in case one of the invariants wasn't met. + /// + /// # Grouping and Ordering + /// + /// Note that the order of the changes for each crate is **deterministic**, should they happen within one commit, + /// as the ordering is imposed to be by line number, ascending. + /// Typically one commit does not span multiple crates, but if it does, for instance when rollups happen, + /// then the order of crates is also **non-deterministic**. + /// pub fn changes_between_ancestor_commits( &self, ancestor_commit: impl Into, @@ -337,10 +348,10 @@ impl Index { ) } - /// Return all `Change`s that are observed between the last time this method was called + /// Return all [`Change`]s that are observed between the last time this method was called /// and the latest state of the `crates.io` index repository, which is obtained by fetching /// the remote called `origin`. - /// The `last_seen_reference()` will be created or adjusted to point to the latest fetched + /// The [`Self::last_seen_reference()`] will be created or adjusted to point to the latest fetched /// state, which causes this method to have a different result each time it is called. /// /// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow @@ -392,8 +403,11 @@ impl Index { /// /// A list of atomic chanes that were performed on the index /// between the two revisions. + /// + /// # Grouping and Ordering + /// /// The changes are grouped by the crate they belong to. - /// The order of the changes for each crate are **non-deterministic**. + /// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending. /// The order of crates is also **non-deterministic**. /// /// If a specific order is required, the changes must be sorted by the caller. diff --git a/src/lib.rs b/src/lib.rs index 984babd..70bd152 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ //! //! Have a look at the real-world usage to learn more about it: //! [crates-io-cli](https://github.com/Byron/crates-io-cli-rs/blob/b7a39ad8ef68adb81b2d8a7e552cb0a2a73f7d5b/src/main.rs#L62) -#![deny(missing_docs, rust_2018_idioms)] +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] /// pub mod index; diff --git a/tests/index/changes_between_commits.rs b/tests/index/changes_between_commits.rs index 2befdf0..aa12ecb 100644 --- a/tests/index/changes_between_commits.rs +++ b/tests/index/changes_between_commits.rs @@ -44,13 +44,12 @@ fn updates_before_yanks_are_picked_up() -> crate::Result { let repo = index.repository(); let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?; let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?; - let mut changes = index.changes_between_commits(from, to)?; + let changes = index.changes_between_commits(from, to)?; assert_eq!(changes.len(), 3, "1 update and 2 yanks"); - changes.sort_by_key(|change| change.versions()[0].version.clone()); - assert_eq!(changes[0].added().expect("first updated").version, "0.3.11"); - assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4"); - assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5"); + assert_eq!(changes[0].yanked().expect("second yanked").version, "0.3.4"); + assert_eq!(changes[1].yanked().expect("third yanked").version, "0.3.5"); + assert_eq!(changes[2].added().expect("first updated").version, "0.3.11"); let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?; assert_eq!(