Skip to content

Commit 28f0d1f

Browse files
committed
Merge branch 'improvements'
2 parents 752ba0c + e8b2c90 commit 28f0d1f

File tree

5 files changed

+81
-33
lines changed

5 files changed

+81
-33
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ http-reqwest = ["gix/blocking-http-transport-reqwest-rust-tls"]
3737

3838

3939
[dependencies]
40-
gix = { version = "0.54.1", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] }
40+
gix = { version = "0.55.2", default-features = false, features = ["blocking-network-client", "blob-diff", "revision"] }
4141
serde = { version = "1", features = ["std", "derive"] }
4242
hex = { version = "0.4.3", features = ["serde"] }
4343
smartstring = { version = "1.0.1", features = ["serde"] }
@@ -49,5 +49,5 @@ hashbrown = { version = "0.14.0", features = ["raw"] }
4949

5050
[dev-dependencies]
5151
gix-testtools = "0.12.0"
52-
crates-index = { version = "2.0.0", default-features = false, features = ["git-performance", "git-https"] }
52+
crates-index = { version = "2.3.0", default-features = false, features = ["git-performance", "git-https"] }
5353
tempdir = "0.3.5"

src/index/diff/delegate.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ use crate::{Change, CrateVersion};
33
use ahash::{AHashSet, RandomState};
44
use bstr::BStr;
55
use hashbrown::raw::RawTable;
6+
use std::hash::Hasher;
7+
use std::ops::Deref;
68

79
#[derive(Default)]
810
pub(crate) struct Delegate {
911
changes: Vec<Change>,
12+
/// All changes that happen within a file, along the line-number it happens in .
13+
per_file_changes: Vec<(usize, Change)>,
1014
err: Option<Error>,
1115
}
1216

@@ -65,8 +69,8 @@ impl Delegate {
6569
if let Some(diff) = change.event.diff().transpose()? {
6670
let mut old_lines = AHashSet::with_capacity(1024);
6771
let location = change.location;
68-
for line in diff.old.data.lines() {
69-
old_lines.insert(line);
72+
for (number, line) in diff.old.data.lines().enumerate() {
73+
old_lines.insert(Line(number, line));
7074
}
7175

7276
// A RawTable is used to represent a Checksum -> CrateVersion map
@@ -75,47 +79,52 @@ impl Delegate {
7579
let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
7680
let hasher = RandomState::new();
7781

78-
for line in diff.new.data.lines() {
82+
for (number, line) in diff.new.data.lines().enumerate() {
7983
// first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
80-
if old_lines.remove(line) {
84+
if old_lines.remove(&Line(number, line)) {
8185
continue;
8286
}
8387
// no need to check if the checksum already exists in the hashmap
84-
// as each checksum appear only once
88+
// as each checksum appears only once
8589
let new_version = version_from_json_line(line, location)?;
8690
new_versions.insert(
8791
hasher.hash_one(new_version.checksum),
88-
new_version,
89-
|rehashed| hasher.hash_one(rehashed.checksum),
92+
(number, new_version),
93+
|rehashed| hasher.hash_one(rehashed.1.checksum),
9094
);
9195
}
9296

9397
for line in old_lines.drain() {
94-
let old_version = version_from_json_line(line, location)?;
98+
let old_version = version_from_json_line(&line, location)?;
9599
let new_version = new_versions
96100
.remove_entry(hasher.hash_one(old_version.checksum), |version| {
97-
version.checksum == old_version.checksum
101+
version.1.checksum == old_version.checksum
98102
});
99103
match new_version {
100-
Some(new_version) => {
104+
Some((_, new_version)) => {
101105
let change = match (old_version.yanked, new_version.yanked) {
102106
(true, false) => Change::Unyanked(new_version),
103107
(false, true) => Change::Yanked(new_version),
104108
_ => continue,
105109
};
106-
self.changes.push(change)
110+
self.per_file_changes.push((line.0, change))
107111
}
108-
None => self.changes.push(Change::VersionDeleted(old_version)),
112+
None => self
113+
.per_file_changes
114+
.push((line.0, Change::VersionDeleted(old_version))),
109115
}
110116
}
111-
for version in new_versions.drain() {
117+
for (number, version) in new_versions.drain() {
112118
let change = if version.yanked {
113119
Change::AddedAndYanked(version)
114120
} else {
115121
Change::Added(version)
116122
};
117-
self.changes.push(change);
123+
self.per_file_changes.push((number, change));
118124
}
125+
self.per_file_changes.sort_by_key(|t| t.0);
126+
self.changes
127+
.extend(self.per_file_changes.drain(..).map(|t| t.1));
119128
}
120129
}
121130
}
@@ -130,6 +139,32 @@ impl Delegate {
130139
}
131140
}
132141

142+
/// A line that assumes there never are equal lines within a file which
143+
/// is the case due to the checksum.
144+
struct Line<'a>(usize, &'a [u8]);
145+
146+
impl std::hash::Hash for Line<'_> {
147+
fn hash<H: Hasher>(&self, state: &mut H) {
148+
self.1.hash(state)
149+
}
150+
}
151+
152+
impl PartialEq<Self> for Line<'_> {
153+
fn eq(&self, other: &Self) -> bool {
154+
self.1.eq(other.1)
155+
}
156+
}
157+
158+
impl Eq for Line<'_> {}
159+
160+
impl<'a> Deref for Line<'a> {
161+
type Target = [u8];
162+
163+
fn deref(&self) -> &Self::Target {
164+
self.1
165+
}
166+
}
167+
133168
fn version_from_json_line(line: &[u8], file_name: &BStr) -> Result<CrateVersion, Error> {
134169
serde_json::from_slice(line).map_err(|err| Error::VersionDecode {
135170
source: err,

src/index/diff/mod.rs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,14 @@ impl Index {
8686
)
8787
}
8888

89-
/// Return all `Change`s that are observed between the last time `peek_changes*(…)` was called
89+
/// Return all [`Change`]s that are observed between the last time `peek_changes*(…)` was called
9090
/// and the latest state of the `crates.io` index repository, which is obtained by fetching
9191
/// the remote called `origin` or whatever is configured for the current `HEAD` branch and lastly
9292
/// what it should be based on knowledge about he crates index.
93-
/// The `last_seen_reference()` will not be created or updated.
93+
/// The [`Self::last_seen_reference()`] will not be created or updated.
9494
/// The second field in the returned tuple is the commit object to which the changes were provided.
95-
/// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
96-
/// as if `fetch_changes(…)` had been called.
95+
/// If one would set the [`Self::last_seen_reference()`] to that object, the effect is exactly the same
96+
/// as if [`Self::fetch_changes()`] had been called.
9797
///
9898
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
9999
/// these operations to be interrupted gracefully.
@@ -204,16 +204,19 @@ impl Index {
204204
))
205205
}
206206

207-
/// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
207+
/// Similar to [`Self::changes()`], but requires `from` and `to` objects to be provided. They may point
208208
/// to either `Commit`s or `Tree`s.
209209
///
210210
/// # Returns
211211
///
212212
/// A list of atomic changes that were performed on the index
213213
/// between the two revisions.
214+
///
215+
/// # Grouping and Ordering
216+
///
214217
/// The changes are grouped by the crate they belong to.
215-
/// The order of the changes for each crate are **non-deterministic**.
216-
/// The order of crates is also **non-deterministic**.
218+
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
219+
/// The order of crates is **non-deterministic**.
217220
///
218221
/// If a specific order is required, the changes must be sorted by the caller.
219222
pub fn changes_between_commits(
@@ -238,7 +241,7 @@ impl Index {
238241
delegate.into_result()
239242
}
240243

241-
/// Similar to `changes()`, but requires `ancestor_commit` and `current_commit` objects to be provided
244+
/// Similar to [`Self::changes()`], but requires `ancestor_commit` and `current_commit` objects to be provided
242245
/// with `ancestor_commit` being in the ancestry of `current_commit`.
243246
///
244247
/// If the invariants regarding `ancestor_commit` and `current_commit` are not upheld, we fallback
@@ -250,6 +253,14 @@ impl Index {
250253
/// A list of atomic changes that were performed on the index
251254
/// between the two revisions, but looking at it one commit at a time, along with the `Order`
252255
/// that the changes are actually in in case one of the invariants wasn't met.
256+
///
257+
/// # Grouping and Ordering
258+
///
259+
/// Note that the order of the changes for each crate is **deterministic**, should they happen within one commit,
260+
/// as the ordering is imposed to be by line number, ascending.
261+
/// Typically one commit does not span multiple crates, but if it does, for instance when rollups happen,
262+
/// then the order of crates is also **non-deterministic**.
263+
///
253264
pub fn changes_between_ancestor_commits(
254265
&self,
255266
ancestor_commit: impl Into<gix::hash::ObjectId>,
@@ -337,10 +348,10 @@ impl Index {
337348
)
338349
}
339350

340-
/// Return all `Change`s that are observed between the last time this method was called
351+
/// Return all [`Change`]s that are observed between the last time this method was called
341352
/// and the latest state of the `crates.io` index repository, which is obtained by fetching
342353
/// the remote called `origin`.
343-
/// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
354+
/// The [`Self::last_seen_reference()`] will be created or adjusted to point to the latest fetched
344355
/// state, which causes this method to have a different result each time it is called.
345356
///
346357
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
@@ -392,8 +403,11 @@ impl Index {
392403
///
393404
/// A list of atomic chanes that were performed on the index
394405
/// between the two revisions.
406+
///
407+
/// # Grouping and Ordering
408+
///
395409
/// The changes are grouped by the crate they belong to.
396-
/// The order of the changes for each crate are **non-deterministic**.
410+
/// The order of the changes for each crate is **deterministic** as they are ordered by line number, ascending.
397411
/// The order of crates is also **non-deterministic**.
398412
///
399413
/// If a specific order is required, the changes must be sorted by the caller.

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//!
33
//! Have a look at the real-world usage to learn more about it:
44
//! [crates-io-cli](https://github.com/Byron/crates-io-cli-rs/blob/b7a39ad8ef68adb81b2d8a7e552cb0a2a73f7d5b/src/main.rs#L62)
5-
#![deny(missing_docs, rust_2018_idioms)]
5+
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
66

77
///
88
pub mod index;

tests/index/changes_between_commits.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,12 @@ fn updates_before_yanks_are_picked_up() -> crate::Result {
4444
let repo = index.repository();
4545
let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?;
4646
let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?;
47-
let mut changes = index.changes_between_commits(from, to)?;
47+
let changes = index.changes_between_commits(from, to)?;
4848

4949
assert_eq!(changes.len(), 3, "1 update and 2 yanks");
50-
changes.sort_by_key(|change| change.versions()[0].version.clone());
51-
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
52-
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
53-
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
50+
assert_eq!(changes[0].yanked().expect("second yanked").version, "0.3.4");
51+
assert_eq!(changes[1].yanked().expect("third yanked").version, "0.3.5");
52+
assert_eq!(changes[2].added().expect("first updated").version, "0.3.11");
5453

5554
let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?;
5655
assert_eq!(

0 commit comments

Comments
 (0)