Skip to content

Commit a80c7fa

Browse files
committed
Merge branch 'baseline-improvements'
2 parents dfaf1be + 87e49b5 commit a80c7fa

File tree

6 files changed

+299
-56
lines changed

6 files changed

+299
-56
lines changed

.github/workflows/cron.yml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: cron
2+
3+
on:
4+
schedule:
5+
- cron: '0 13,1 * * *'
6+
workflow_dispatch:
7+
8+
jobs:
9+
stress:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v3
13+
- uses: Swatinem/rust-cache@v2
14+
- name: baseline
15+
run: baseline-atomic

src/index/diff/mod.rs

+147-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@ use std::sync::atomic::AtomicBool;
66
mod delegate;
77
use delegate::Delegate;
88

9+
/// The order we maintain for the produced changes.
10+
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
11+
pub enum Order {
12+
/// Compare provided trees or commits without applying any other logic, with the order being influenced by
13+
/// factors like hashmaps.
14+
///
15+
/// The benefit is mode is the optimal performance as only one diff is created.
16+
ImplementationDefined,
17+
/// If the provided revisions are commits, single step through the history that connects them to maintain
18+
/// the order in which changes were submitted to the crates-index for all user-defined changes.
19+
///
20+
/// Admin changes are still implementation defined, but typically involve only deletions.
21+
///
22+
/// The shortcomings of this approach is that each pair of commits has to be diffed individually, increasing
23+
/// the amount of work linearly.
24+
AsInCratesIndex,
25+
}
26+
927
/// The error returned by methods dealing with obtaining index changes.
1028
#[derive(Debug, thiserror::Error)]
1129
#[allow(missing_docs)]
@@ -49,9 +67,22 @@ pub enum Error {
4967

5068
/// Find changes without modifying the underling repository
5169
impl Index {
52-
/// As `peek_changes_with_options`, but without the options.
70+
/// As `peek_changes_with_options()`, but without the options.
5371
pub fn peek_changes(&self) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
54-
self.peek_changes_with_options(git::progress::Discard, &AtomicBool::default())
72+
self.peek_changes_with_options(
73+
git::progress::Discard,
74+
&AtomicBool::default(),
75+
Order::ImplementationDefined,
76+
)
77+
}
78+
79+
/// As `peek_changes()` but provides changes similar to those in the crates index.
80+
pub fn peek_changes_ordered(&self) -> Result<(Vec<Change>, git::hash::ObjectId), Error> {
81+
self.peek_changes_with_options(
82+
git::progress::Discard,
83+
&AtomicBool::default(),
84+
Order::ImplementationDefined,
85+
)
5586
}
5687

5788
/// Return all `Change`s that are observed between the last time `peek_changes*(…)` was called
@@ -63,6 +94,9 @@ impl Index {
6394
/// If one would set the `last_seen_reference()` to that object, the effect is exactly the same
6495
/// as if `fetch_changes(…)` had been called.
6596
///
97+
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
98+
/// these operations to be interrupted gracefully.
99+
///
66100
/// # Resource Usage
67101
///
68102
/// As this method fetches the git repository, loose objects or small packs may be created. Over time,
@@ -75,6 +109,7 @@ impl Index {
75109
&self,
76110
progress: P,
77111
should_interrupt: &AtomicBool,
112+
order: Order,
78113
) -> Result<(Vec<Change>, git::hash::ObjectId), Error>
79114
where
80115
P: git::Progress,
@@ -159,15 +194,21 @@ impl Index {
159194
.detach()
160195
};
161196

162-
Ok((self.changes_between_commits(from, to)?, to))
197+
Ok((
198+
match order {
199+
Order::ImplementationDefined => self.changes_between_commits(from, to)?,
200+
Order::AsInCratesIndex => self.changes_between_ancestor_commits(from, to)?.0,
201+
},
202+
to,
203+
))
163204
}
164205

165206
/// Similar to `changes()`, but requires `from` and `to` objects to be provided. They may point
166207
/// to either `Commit`s or `Tree`s.
167208
///
168209
/// # Returns
169210
///
170-
/// A list of atomic chanes that were performed on the index
211+
/// A list of atomic changes that were performed on the index
171212
/// between the two revisions.
172213
/// The changes are grouped by the crate they belong to.
173214
/// The order of the changes for each crate are **non-deterministic**.
@@ -194,13 +235,106 @@ impl Index {
194235
.for_each_to_obtain_tree(&to, |change| delegate.handle(change))?;
195236
delegate.into_result()
196237
}
238+
239+
/// Similar to `changes()`, but requires `ancestor_commit` and `current_commit` objects to be provided
240+
/// with `ancestor_commit` being in the ancestry of `current_commit`.
241+
///
242+
/// If the invariants regarding `ancestor_commit` and `current_commit` are not upheld, we fallback
243+
/// to `changes_between_commits()` which doesn't have such restrictions.
244+
/// This can happen if the crates-index was squashed for instance.
245+
///
246+
/// # Returns
247+
///
248+
/// A list of atomic changes that were performed on the index
249+
/// between the two revisions, but looking at it one commit at a time, along with the `Order`
250+
/// that the changes are actually in in case one of the invariants wasn't met.
251+
pub fn changes_between_ancestor_commits(
252+
&self,
253+
ancestor_commit: impl Into<git::hash::ObjectId>,
254+
current_commit: impl Into<git::hash::ObjectId>,
255+
) -> Result<(Vec<Change>, Order), Error> {
256+
let from_commit = ancestor_commit.into();
257+
let to_commit = current_commit.into();
258+
match self.commit_ancestry(from_commit, to_commit) {
259+
Some(commits) => {
260+
let mut changes = Vec::new();
261+
for from_to in commits.windows(2) {
262+
let from = from_to[0];
263+
let to = from_to[1];
264+
changes.extend(self.changes_between_commits(from, to)?);
265+
}
266+
Ok((changes, Order::AsInCratesIndex))
267+
}
268+
None => self
269+
.changes_between_commits(from_commit, to_commit)
270+
.map(|c| (c, Order::ImplementationDefined)),
271+
}
272+
}
273+
274+
/// Return a list of commits like `from_commit..=to_commits`.
275+
fn commit_ancestry(
276+
&self,
277+
ancestor_commit: git::hash::ObjectId,
278+
current_commit: git::hash::ObjectId,
279+
) -> Option<Vec<git::hash::ObjectId>> {
280+
let time_in_seconds_since_epoch = ancestor_commit
281+
.attach(&self.repo)
282+
.object()
283+
.ok()?
284+
.try_into_commit()
285+
.ok()?
286+
.committer()
287+
.ok()?
288+
.time
289+
.seconds_since_unix_epoch;
290+
let mut commits = current_commit
291+
.attach(&self.repo)
292+
.ancestors()
293+
.sorting(
294+
git::traverse::commit::Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
295+
time_in_seconds_since_epoch,
296+
},
297+
)
298+
.first_parent_only()
299+
.all()
300+
.ok()?
301+
.map(|c| c.map(|c| c.detach()))
302+
.collect::<Result<Vec<_>, _>>()
303+
.ok()?;
304+
305+
commits.reverse();
306+
if *commits.first()? != ancestor_commit {
307+
// try harder, commit resolution is just a second.
308+
let pos = commits.iter().position(|c| *c == ancestor_commit)?;
309+
commits = commits[pos..].into();
310+
}
311+
assert_eq!(
312+
commits[commits.len() - 1],
313+
current_commit,
314+
"the iterator includes the tips"
315+
);
316+
Some(commits)
317+
}
197318
}
198319

199320
/// Find changes while changing the underlying repository in one way or another.
200321
impl Index {
201-
/// As `fetch_changes_with_options`, but without the options.
322+
/// As `fetch_changes_with_options()`, but without the options.
202323
pub fn fetch_changes(&self) -> Result<Vec<Change>, Error> {
203-
self.fetch_changes_with_options(git::progress::Discard, &AtomicBool::default())
324+
self.fetch_changes_with_options(
325+
git::progress::Discard,
326+
&AtomicBool::default(),
327+
Order::ImplementationDefined,
328+
)
329+
}
330+
331+
/// As `fetch_changes()`, but returns an ordered result.
332+
pub fn fetch_changes_ordered(&self) -> Result<Vec<Change>, Error> {
333+
self.fetch_changes_with_options(
334+
git::progress::Discard,
335+
&AtomicBool::default(),
336+
Order::AsInCratesIndex,
337+
)
204338
}
205339

206340
/// Return all `Change`s that are observed between the last time this method was called
@@ -209,6 +343,11 @@ impl Index {
209343
/// The `last_seen_reference()` will be created or adjusted to point to the latest fetched
210344
/// state, which causes this method to have a different result each time it is called.
211345
///
346+
/// The `progress` and `should_interrupt` parameters are used to provide progress for fetches and allow
347+
/// these operations to be interrupted gracefully.
348+
///
349+
/// `order` configures how changes should be ordered.
350+
///
212351
/// # Resource Usage
213352
///
214353
/// As this method fetches the git repository, loose objects or small packs may be created. Over time,
@@ -220,12 +359,13 @@ impl Index {
220359
&self,
221360
progress: P,
222361
should_interrupt: &AtomicBool,
362+
order: Order,
223363
) -> Result<Vec<Change>, Error>
224364
where
225365
P: git::Progress,
226366
P::SubProgress: 'static,
227367
{
228-
let (changes, to) = self.peek_changes_with_options(progress, should_interrupt)?;
368+
let (changes, to) = self.peek_changes_with_options(progress, should_interrupt, order)?;
229369
self.set_last_seen_reference(to)?;
230370
Ok(changes)
231371
}

tests/baseline_atomic.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,8 @@ mod shared;
55
#[cfg_attr(debug_assertions, ignore)]
66
#[test]
77
fn one_per_commit() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
8-
shared::baseline(Step::OnePerCommit)
8+
shared::baseline(Step::Realistic {
9+
ordered_partitions: 2,
10+
unordered_partitions: 38,
11+
})
912
}

tests/index/changes_between_commits.rs

+42-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::index::index_ro;
2+
use crates_index_diff::index::diff::Order;
23
use crates_index_diff::{Change, CrateVersion, Index};
34
use git_repository as git;
45

@@ -13,20 +14,57 @@ fn directory_deletions_are_not_picked_up() -> crate::Result {
1314
Ok(())
1415
}
1516

17+
#[test]
18+
fn ancestor_commits_retain_order() -> crate::Result {
19+
let index = index_ro()?;
20+
let repo = index.repository();
21+
let from = repo.rev_parse_single("@^{/Yanking crate `gitten#0.3.1`}~1")?;
22+
let to = repo.rev_parse_single(":/Yanking crate `gitten#0.3.0`")?;
23+
let (changes, order) = index.changes_between_ancestor_commits(from, to)?;
24+
25+
assert_eq!(order, Order::AsInCratesIndex, "both commits are connected");
26+
assert_eq!(
27+
changes.len(),
28+
2,
29+
"we did specify one more than we needed as the `from` commit would otherwise not be included (hence `~1`)"
30+
);
31+
32+
assert_eq!(
33+
changes[0].yanked().expect("yanked").version,
34+
"0.3.1",
35+
"this goes against ascending order, but is what's recorded in the crates index"
36+
);
37+
38+
assert_eq!(changes[1].yanked().expect("yanked").version, "0.3.0");
39+
Ok(())
40+
}
41+
1642
#[test]
1743
fn updates_before_yanks_are_picked_up() -> crate::Result {
1844
let index = index_ro()?;
1945
let repo = index.repository();
20-
let mut changes = index.changes_between_commits(
21-
repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?,
22-
repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?,
23-
)?;
46+
let from = repo.rev_parse_single("@^{/updating ansi-color-codec 0.3.11}~1")?;
47+
let to = repo.rev_parse_single("@^{/yanking ansi-color-codec 0.3.5}")?;
48+
let mut changes = index.changes_between_commits(from, to)?;
2449

2550
assert_eq!(changes.len(), 3, "1 update and 2 yanks");
2651
changes.sort_by_key(|change| change.versions()[0].version.clone());
52+
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
2753
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
2854
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
55+
56+
let (mut changes, order) = index.changes_between_ancestor_commits(from, to)?;
57+
assert_eq!(
58+
order,
59+
Order::AsInCratesIndex,
60+
"we provided commits, so ancestry should pan out"
61+
);
62+
63+
assert_eq!(changes.len(), 3, "1 update and 2 yanks");
64+
changes.sort_by_key(|change| change.versions()[0].version.clone());
2965
assert_eq!(changes[0].added().expect("first updated").version, "0.3.11");
66+
assert_eq!(changes[1].yanked().expect("second yanked").version, "0.3.4");
67+
assert_eq!(changes[2].yanked().expect("third yanked").version, "0.3.5");
3068
Ok(())
3169
}
3270

tests/index/mod.rs

+29-23
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crates_index_diff::index::diff::Order;
12
use crates_index_diff::Index;
23
use git_repository as git;
34
use git_repository::refs::transaction::PreviousValue;
@@ -13,30 +14,35 @@ const NUM_CHANGES_SINCE_EVER: usize = 3523;
1314
fn peek_changes() -> crate::Result {
1415
let mut index = index_ro()?;
1516
index.branch_name = "main";
16-
assert!(
17-
index.last_seen_reference().is_err(),
18-
"marker ref doesn't exist"
19-
);
20-
let (changes, last_seen_revision) =
21-
index.peek_changes_with_options(git::progress::Discard, &AtomicBool::default())?;
22-
assert_eq!(
23-
changes.len(),
24-
NUM_CHANGES_SINCE_EVER,
25-
"all changes since the beginning of history"
26-
);
17+
for order in [Order::ImplementationDefined, Order::AsInCratesIndex] {
18+
assert!(
19+
index.last_seen_reference().is_err(),
20+
"marker ref doesn't exist"
21+
);
22+
let (changes, last_seen_revision) = index.peek_changes_with_options(
23+
git::progress::Discard,
24+
&AtomicBool::default(),
25+
order,
26+
)?;
27+
assert_eq!(
28+
changes.len(),
29+
NUM_CHANGES_SINCE_EVER,
30+
"all changes since the beginning of history"
31+
);
2732

28-
let origin_main = index
29-
.repository()
30-
.find_reference("refs/remotes/origin/main")?;
31-
assert_eq!(
32-
last_seen_revision,
33-
origin_main.id(),
34-
"last seen reference should the latest state from the clone"
35-
);
36-
assert!(
37-
index.last_seen_reference().is_err(),
38-
"the last-seen reference has not been created"
39-
);
33+
let origin_main = index
34+
.repository()
35+
.find_reference("refs/remotes/origin/main")?;
36+
assert_eq!(
37+
last_seen_revision,
38+
origin_main.id(),
39+
"last seen reference should the latest state from the clone"
40+
);
41+
assert!(
42+
index.last_seen_reference().is_err(),
43+
"the last-seen reference has not been created"
44+
);
45+
}
4046
Ok(())
4147
}
4248

0 commit comments

Comments
 (0)