diff --git a/gix-diff/tests/diff/blob/slider.rs b/gix-diff/tests/diff/blob/slider.rs index ed3c657dff4..c352ba34a79 100644 --- a/gix-diff/tests/diff/blob/slider.rs +++ b/gix-diff/tests/diff/blob/slider.rs @@ -1,11 +1,14 @@ -use gix_diff::blob::intern::TokenSource; -use gix_diff::blob::unified_diff::ContextSize; -use gix_diff::blob::{Algorithm, UnifiedDiff}; +use gix_diff::blob::Algorithm; +use gix_object::bstr::ByteSlice; use gix_testtools::bstr::{BString, ByteVec}; use pretty_assertions::StrComparison; +use std::ffi::OsStr; +use std::path::Path; #[test] -fn baseline() -> gix_testtools::Result { +fn baseline_v1() -> gix_testtools::Result { + use gix_diff::blob::{unified_diff::ContextSize, UnifiedDiff}; + let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?; let asset_dir = worktree_path.join("assets"); @@ -15,29 +18,9 @@ fn baseline() -> gix_testtools::Result { for entry in dir { let entry = entry?; - let file_name = entry.file_name().into_string().expect("to be string"); - - if !file_name.ends_with(".baseline") { + let Some((file_name, algorithm, old_data, new_data)) = parse_dir_entry(&asset_dir, &entry.file_name()) else { continue; - } - - let parts: Vec<_> = file_name.split('.').collect(); - let [name, algorithm, ..] = parts[..] else { - unreachable!() }; - let algorithm = match algorithm { - "myers" => Algorithm::Myers, - "histogram" => Algorithm::Histogram, - _ => unreachable!(), - }; - - let parts: Vec<_> = name.split('-').collect(); - let [old_blob_id, new_blob_id] = parts[..] else { - unreachable!(); - }; - - let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; - let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; let interner = gix_diff::blob::intern::InternedInput::new( tokens_for_diffing(old_data.as_slice()), @@ -70,17 +53,65 @@ fn baseline() -> gix_testtools::Result { }) .to_string(); - let baseline = baseline - .fold(BString::default(), |mut acc, diff_hunk| { - acc.push_str(diff_hunk.header.to_string().as_str()); - acc.push(b'\n'); + let baseline = baseline.fold_to_unidiff().to_string(); + let actual_matches_baseline = actual == baseline; + diffs.push((actual, baseline, actual_matches_baseline, file_name)); + } - acc.extend_from_slice(&diff_hunk.lines); + if diffs.is_empty() { + eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions"); + } - acc - }) + assert_diffs(&diffs); + + Ok(()) +} + +fn tokens_for_diffing(data: &[u8]) -> impl gix_diff::blob::intern::TokenSource { + gix_diff::blob::sources::byte_lines(data) +} + +#[test] +fn baseline_v2() -> gix_testtools::Result { + use gix_diff::blob::v2::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig}; + + let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?; + let asset_dir = worktree_path.join("assets"); + + let dir = std::fs::read_dir(&worktree_path)?; + + let mut diffs = Vec::new(); + + for entry in dir { + let entry = entry?; + let Some((file_name, algorithm, old_data, new_data)) = parse_dir_entry(&asset_dir, &entry.file_name()) else { + continue; + }; + + let input = InternedInput::new(old_data.to_str().unwrap(), new_data.to_str().unwrap()); + let algorithm = match algorithm { + gix_diff::blob::Algorithm::Myers => Algorithm::Myers, + gix_diff::blob::Algorithm::Histogram => Algorithm::Histogram, + gix_diff::blob::Algorithm::MyersMinimal => Algorithm::MyersMinimal, + }; + + let mut diff = Diff::compute(algorithm, &input); + diff.postprocess_lines(&input); + + let actual = diff + .unified_diff( + &BasicLineDiffPrinter(&input.interner), + UnifiedDiffConfig::default(), + &input, + ) .to_string(); + let baseline_path = worktree_path.join(&file_name); + let baseline = std::fs::read(baseline_path)?; + let baseline = baseline::Baseline::new(&baseline); + + let baseline = baseline.fold_to_unidiff().to_string(); + let actual_matches_baseline = actual == baseline; diffs.push((actual, baseline, actual_matches_baseline, file_name)); } @@ -89,6 +120,38 @@ fn baseline() -> gix_testtools::Result { eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions"); } + assert_diffs(&diffs); + Ok(()) +} + +fn parse_dir_entry(asset_dir: &Path, file_name: &OsStr) -> Option<(String, Algorithm, Vec, Vec)> { + let file_name = file_name.to_str().expect("ascii filename").to_owned(); + + if !file_name.ends_with(".baseline") { + return None; + } + + let parts: Vec<_> = file_name.split('.').collect(); + let [name, algorithm, ..] = parts[..] else { + unreachable!("BUG: Need file named '.'") + }; + let algorithm = match algorithm { + "myers" => Algorithm::Myers, + "histogram" => Algorithm::Histogram, + other => unreachable!("'{other}' is not a supported algorithm"), + }; + + let parts: Vec<_> = name.split('-').collect(); + let [old_blob_id, new_blob_id] = parts[..] else { + unreachable!("BUG: name part of filename must be -"); + }; + + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob"))).unwrap(); + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob"))).unwrap(); + (file_name, algorithm, old_data, new_data).into() +} + +fn assert_diffs(diffs: &[(String, String, bool, String)]) { let total_diffs = diffs.len(); let matching_diffs = diffs .iter() @@ -115,16 +178,10 @@ fn baseline() -> gix_testtools::Result { ) } ); - - Ok(()) -} - -fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { - gix_diff::blob::sources::byte_lines(data) } mod baseline { - use gix_object::bstr::ByteSlice; + use gix_object::bstr::{ByteSlice, ByteVec}; use std::iter::Peekable; use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader}; @@ -193,6 +250,20 @@ mod baseline { } } + impl Baseline<'_> { + /// Fold all [`DiffHunk`]s we produce into a unified_diff string + pub fn fold_to_unidiff(self) -> BString { + self.fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + } + } + impl Iterator for Baseline<'_> { type Item = DiffHunk;