diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 4417277cfe6..51bc903424c 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -10,6 +10,11 @@ authors = ["Christoph Rüßler ", "Sebastian Thi edition = "2021" rust-version = "1.82" +[features] +## An experimental use of the v0.2 branch of `imara-diff` to allow trying it out, and for writing tests against it more easily. +## We will decide later how it should actually be exposed. +blob-experimental = ["gix-diff/blob-experimental"] + [dependencies] gix-commitgraph = { version = "^0.30.0", path = "../gix-commitgraph" } gix-revwalk = { version = "^0.23.0", path = "../gix-revwalk" } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 773ac1425cb..9af23e665e6 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,4 +1,4 @@ -use std::{num::NonZeroU32, ops::Range}; +use std::num::NonZeroU32; use gix_diff::{blob::intern::TokenSource, tree::Visit}; use gix_hash::ObjectId; @@ -748,6 +748,7 @@ fn tree_diff_with_rewrites_at_file_path( } #[allow(clippy::too_many_arguments)] +#[cfg(not(feature = "blob-experimental"))] fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, @@ -758,6 +759,8 @@ fn blob_changes( diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { + use std::ops::Range; + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. struct ChangeRecorder { last_seen_after_end: u32, @@ -839,6 +842,85 @@ fn blob_changes( Ok(res) } +#[allow(clippy::too_many_arguments)] +#[cfg(feature = "blob-experimental")] +fn blob_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, + previous_file_path: &BStr, + diff_algorithm: gix_diff::blob::Algorithm, + stats: &mut Statistics, +) -> Result, Error> { + use gix_diff::blob::v2::Hunk; + + resource_cache.set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + previous_file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + )?; + resource_cache.set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + )?; + + let outcome = resource_cache.prepare_diff()?; + let input = gix_diff::blob::v2::InternedInput::new( + outcome.old.data.as_slice().unwrap_or_default(), + outcome.new.data.as_slice().unwrap_or_default(), + ); + + let diff_algorithm: gix_diff::blob::v2::Algorithm = match diff_algorithm { + gix_diff::blob::Algorithm::Histogram => gix_diff::blob::v2::Algorithm::Histogram, + gix_diff::blob::Algorithm::Myers => gix_diff::blob::v2::Algorithm::Myers, + gix_diff::blob::Algorithm::MyersMinimal => gix_diff::blob::v2::Algorithm::MyersMinimal, + }; + let mut diff = gix_diff::blob::v2::Diff::compute(diff_algorithm, &input); + diff.postprocess_lines(&input); + + let mut last_seen_after_end = 0; + let mut changes = diff.hunks().fold(Vec::new(), |mut hunks, hunk| { + let Hunk { before, after } = hunk; + + // This checks for unchanged hunks. + if after.start > last_seen_after_end { + hunks.push(Change::Unchanged(last_seen_after_end..after.start)); + } + + match (!before.is_empty(), !after.is_empty()) { + (_, true) => { + hunks.push(Change::AddedOrReplaced( + after.start..after.end, + before.end - before.start, + )); + } + (true, false) => { + hunks.push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + } + + last_seen_after_end = after.end; + + hunks + }); + + let total_number_of_lines = input.after.len() as u32; + if input.after.len() > last_seen_after_end as usize { + changes.push(Change::Unchanged(last_seen_after_end..total_number_of_lines)); + } + + stats.blobs_diffed += 1; + Ok(changes) +} + fn find_path_entry_in_commit( odb: &impl gix_object::Find, commit: &gix_hash::oid, diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index bd9e1f50c35..3b404a96153 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -292,12 +292,27 @@ mktest!( 3 ); -/// As of 2024-09-24, these tests are expected to fail. +/// As of 2024-09-24, the Myers-related test is expected to fail. Both tests use `imara-diff` 0.1 +/// under the hood. /// /// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test] #[should_panic = "empty-lines-myers"] -fn diff_disparity() { +#[cfg(not(feature = "blob-experimental"))] +fn diff_disparity_imara_diff_v1() { + diff_disparity_base(); +} + +/// As of 2025-12-07, both algorithms are expected to pass. They use `imara-diff` 0.2 under the hood. +/// +/// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 +#[test] +#[cfg(feature = "blob-experimental")] +fn diff_disparity_imara_diff_v2() { + diff_disparity_base(); +} + +fn diff_disparity_base() { for case in ["empty-lines-myers", "empty-lines-histogram"] { let Fixture { odb, diff --git a/justfile b/justfile index eeb7a76229e..bae0747ef61 100755 --- a/justfile +++ b/justfile @@ -173,6 +173,7 @@ unit-tests: cargo nextest run -p gix-transport --features async-client --no-fail-fast cargo nextest run -p gix-protocol --features blocking-client --no-fail-fast cargo nextest run -p gix-protocol --features async-client --no-fail-fast + cargo nextest run -p gix-blame --features blob-experimental --no-fail-fast cargo nextest run -p gix --no-default-features --no-fail-fast cargo nextest run -p gix --no-default-features --features basic,comfort,max-performance-safe --no-fail-fast cargo nextest run -p gix --no-default-features --features basic,extras,comfort,need-more-recent-msrv --no-fail-fast