Skip to content

various improvements, investigate strong ordering #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ http-reqwest = ["gix/blocking-http-transport-reqwest-rust-tls"]


[dependencies]
gix = { version = "0.54.1", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] }
gix = { version = "0.55.2", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] }
serde = { version = "1", features = ["std", "derive"] }
hex = { version = "0.4.3", features = ["serde"] }
smartstring = { version = "1.0.1", features = ["serde"] }
Expand All @@ -49,5 +49,5 @@ hashbrown = { version = "0.14.0", features = ["raw"] }

[dev-dependencies]
gix-testtools = "0.12.0"
crates-index = { version = "2.0.0", default-features = false, features = ["git-performance", "git-https"] }
crates-index = { version = "2.3.0", default-features = false, features = ["git-performance", "git-https"] }
tempdir = "0.3.5"
63 changes: 49 additions & 14 deletions src/index/diff/delegate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@ use crate::{Change, CrateVersion};
use ahash::{AHashSet, RandomState};
use bstr::BStr;
use hashbrown::raw::RawTable;
use std::hash::Hasher;
use std::ops::Deref;

#[derive(Default)]
pub(crate) struct Delegate {
changes: Vec<Change>,
/// All changes that happen within a file, along the line-number it happens in .
per_file_changes: Vec<(usize, Change)>,
err: Option<Error>,
}

Expand Down Expand Up @@ -65,8 +69,8 @@ impl Delegate {
if let Some(diff) = change.event.diff().transpose()? {
let mut old_lines = AHashSet::with_capacity(1024);
let location = change.location;
for line in diff.old.data.lines() {
old_lines.insert(line);
for (number, line) in diff.old.data.lines().enumerate() {
old_lines.insert(Line(number, line));
}

// A RawTable is used to represent a Checksum -> CrateVersion map
Expand All @@ -75,47 +79,52 @@ impl Delegate {
let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
let hasher = RandomState::new();

for line in diff.new.data.lines() {
for (number, line) in diff.new.data.lines().enumerate() {
// first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
if old_lines.remove(line) {
if old_lines.remove(&Line(number, line)) {
continue;
}
// no need to check if the checksum already exists in the hashmap
// as each checksum appear only once
// as each checksum appears only once
let new_version = version_from_json_line(line, location)?;
new_versions.insert(
hasher.hash_one(new_version.checksum),
new_version,
|rehashed| hasher.hash_one(rehashed.checksum),
(number, new_version),
|rehashed| hasher.hash_one(rehashed.1.checksum),
);
}

for line in old_lines.drain() {
let old_version = version_from_json_line(line, location)?;
let old_version = version_from_json_line(&line, location)?;
let new_version = new_versions
.remove_entry(hasher.hash_one(old_version.checksum), |version| {
version.checksum == old_version.checksum
version.1.checksum == old_version.checksum
});
match new_version {
Some(new_version) => {
Some((_, new_version)) => {
let change = match (old_version.yanked, new_version.yanked) {
(true, false) => Change::Unyanked(new_version),
(false, true) => Change::Yanked(new_version),
_ => continue,
};
self.changes.push(change)
self.per_file_changes.push((line.0, change))
}
None => self.changes.push(Change::VersionDeleted(old_version)),
None => self
.per_file_changes
.push((line.0, Change::VersionDeleted(old_version))),
}
}
for version in new_versions.drain() {
for (number, version) in new_versions.drain() {
let change = if version.yanked {
Change::AddedAndYanked(version)
} else {
Change::Added(version)
};
self.changes.push(change);
self.per_file_changes.push((number, change));
}
self.per_file_changes.sort_by_key(|t| t.0);
self.changes
.extend(self.per_file_changes.drain(..).map(|t| t.1));
}
}
}
Expand All @@ -130,6 +139,32 @@ impl Delegate {
}
}

/// A line that assumes there never are equal lines within a file which
/// is the case due to the checksum.
struct Line<'a>(usize, &'a [u8]);

impl std::hash::Hash for Line<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.1.hash(state)
}
}

impl PartialEq<Self> for Line<'_> {
fn eq(&self, other: &Self) -> bool {
self.1.eq(other.1)
}
}

impl Eq for Line<'_> {}

impl<'a> Deref for Line<'a> {
type Target = [u8];

fn deref(&self) -> &Self::Target {
self.1
}
}

fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result<CrateVersion, Error> {
serde_json::from_slice(line).map_err(|err| Error::VersionDecode {
source: err,
Expand Down
36 changes: 25 additions & 11 deletions src/index/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ impl Index {
)
}

/// Return all `Change`s that are observed between the last time `peek_changes*(…)` was called
/// Return all [`Change`]s that are observed between the last time `peek_changes*(…)` was called
/// and the latest state of the `crates.io` index repository, which is obtained by fetching
/// the remote called `origin` or whatever is configured for the current `HEAD` branch and lastly
/// what it should be based on knowledge about he crates index.
/// The `last_seen_reference()` will not be created or updated.
/// The [`Self::last_seen_reference()`] will not be created or updated.
/// The second field in the returned tuple is the commit object to which the changes were provided.
/// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
/// as if `fetch_changes(…)` had been called.
/// If one would set the [`Self::last_seen_reference()`] to that object, the effect is exactly the same
/// as if [`Self::fetch_changes()`] had been called.
///
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
/// these operations to be interrupted gracefully.
Expand Down Expand Up @@ -204,16 +204,19 @@ impl Index {
))
}

/// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
/// Similar to [`Self::changes()`], but requires `from` and `to` objects to be provided. They may point
/// to either `Commit`s or `Tree`s.
///
/// # Returns
///
/// A list of atomic changes that were performed on the index
/// between the two revisions.
///
/// # Grouping and Ordering
///
/// The changes are grouped by the crate they belong to.
/// The order of the changes for each crate are **non-deterministic**.
/// The order of crates is also **non-deterministic**.
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
/// The order of crates is **non-deterministic**.
///
/// If a specific order is required, the changes must be sorted by the caller.
pub fn changes_between_commits(
Expand All @@ -238,7 +241,7 @@ impl Index {
delegate.into_result()
}

/// Similar to `changes()`, but requires `ancestor_commit` and `current_commit` objects to be provided
/// Similar to [`Self::changes()`], but requires `ancestor_commit` and `current_commit` objects to be provided
/// with `ancestor_commit` being in the ancestry of `current_commit`.
///
/// If the invariants regarding `ancestor_commit` and `current_commit` are not upheld, we fallback
Expand All @@ -250,6 +253,14 @@ impl Index {
/// A list of atomic changes that were performed on the index
/// between the two revisions, but looking at it one commit at a time, along with the `Order`
/// that the changes are actually in in case one of the invariants wasn't met.
///
/// # Grouping and Ordering
///
/// Note that the order of the changes for each crate is **deterministic**, should they happen within one commit,
/// as the ordering is imposed to be by line number, ascending.
/// Typically one commit does not span multiple crates, but if it does, for instance when rollups happen,
/// then the order of crates is also **non-deterministic**.
///
pub fn changes_between_ancestor_commits(
&self,
ancestor_commit: impl Into<gix::hash::ObjectId>,
Expand Down Expand Up @@ -337,10 +348,10 @@ impl Index {
)
}

/// Return all `Change`s that are observed between the last time this method was called
/// Return all [`Change`]s that are observed between the last time this method was called
/// and the latest state of the `crates.io` index repository, which is obtained by fetching
/// the remote called `origin`.
/// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
/// The [`Self::last_seen_reference()`] will be created or adjusted to point to the latest fetched
/// state, which causes this method to have a different result each time it is called.
///
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
Expand Down Expand Up @@ -392,8 +403,11 @@ impl Index {
///
/// A list of atomic chanes that were performed on the index
/// between the two revisions.
///
/// # Grouping and Ordering
///
/// The changes are grouped by the crate they belong to.
/// The order of the changes for each crate are **non-deterministic**.
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
/// The order of crates is also **non-deterministic**.
///
/// If a specific order is required, the changes must be sorted by the caller.
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//!
//! Have a look at the real-world usage to learn more about it:
//! [crates-io-cli](https://github.com/Byron/crates-io-cli-rs/blob/b7a39ad8ef68adb81b2d8a7e552cb0a2a73f7d5b/src/main.rs#L62)
#![deny(missing_docs, rust_2018_idioms)]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]

///
pub mod index;
Expand Down
9 changes: 4 additions & 5 deletions tests/index/changes_between_commits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,12 @@ fn updates_before_yanks_are_picked_up() -> crate::Result {
let repo = index.repository();
let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?;
let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?;
let mut changes = index.changes_between_commits(from, to)?;
let changes = index.changes_between_commits(from, to)?;

assert_eq!(changes.len(), 3, "1 update and 2 yanks");
changes.sort_by_key(|change| change.versions()[0].version.clone());
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
assert_eq!(changes[0].yanked().expect("second yanked").version, "0.3.4");
assert_eq!(changes[1].yanked().expect("third yanked").version, "0.3.5");
assert_eq!(changes[2].added().expect("first updated").version, "0.3.11");

let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?;
assert_eq!(
Expand Down