From 6df2d589338945e663a3a798a60db17db5f805b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 25 Mar 2025 12:37:52 +0100 Subject: [PATCH 1/2] Add function for linearizing `BuildStep` substeps --- src/build_helper/src/metrics.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/build_helper/src/metrics.rs b/src/build_helper/src/metrics.rs index b6daac32a4412..4ab61245c16b5 100644 --- a/src/build_helper/src/metrics.rs +++ b/src/build_helper/src/metrics.rs @@ -156,25 +156,30 @@ impl BuildStep { child.find_by_type(r#type, result); } } -} - -/// Writes build steps into a nice indented table. -pub fn format_build_steps(root: &BuildStep) -> String { - use std::fmt::Write; - let mut substeps: Vec<(u32, &BuildStep)> = Vec::new(); + /// Returns a Vec with all substeps, ordered by their hierarchical order. + /// The first element of the tuple is the depth of a given step. + fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> { + let mut substeps: Vec<(u32, &BuildStep)> = Vec::new(); - fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) { - substeps.push((level, step)); - for child in &step.children { - visit(child, level + 1, substeps); + fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) { + substeps.push((level, step)); + for child in &step.children { + visit(child, level + 1, substeps); + } } + + visit(self, 0, &mut substeps); + substeps } +} - visit(root, 0, &mut substeps); +/// Writes build steps into a nice indented table. +pub fn format_build_steps(root: &BuildStep) -> String { + use std::fmt::Write; let mut output = String::new(); - for (level, step) in substeps { + for (level, step) in root.linearize_steps() { let label = format!( "{}{}", ".".repeat(level as usize), From 813783e7115c58b5e70034f31be02201dac55407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 25 Mar 2025 13:43:25 +0100 Subject: [PATCH 2/2] Add diff of bootstrap steps --- src/build_helper/src/metrics.rs | 34 ++++++---- src/ci/citool/Cargo.lock | 7 ++ src/ci/citool/Cargo.toml | 1 + src/ci/citool/src/analysis.rs | 114 ++++++++++++++++++++++++++++++-- src/ci/citool/src/main.rs | 46 +++++++------ 5 files changed, 162 insertions(+), 40 deletions(-) diff --git a/src/build_helper/src/metrics.rs b/src/build_helper/src/metrics.rs index 4ab61245c16b5..fdff9cd18cea7 100644 --- a/src/build_helper/src/metrics.rs +++ b/src/build_helper/src/metrics.rs @@ -109,6 +109,8 @@ pub struct BuildStep { pub r#type: String, pub children: Vec, pub duration: Duration, + // Full name of the step, including all parent names + pub full_name: String, } impl BuildStep { @@ -116,7 +118,7 @@ impl BuildStep { /// The most important thing is that the build step aggregates the /// durations of all children, so that it can be easily accessed. pub fn from_invocation(invocation: &JsonInvocation) -> Self { - fn parse(node: &JsonNode) -> Option { + fn parse(node: &JsonNode, parent_name: &str) -> Option { match node { JsonNode::RustbuildStep { type_: kind, @@ -124,11 +126,14 @@ impl BuildStep { duration_excluding_children_sec, .. } => { - let children: Vec<_> = children.into_iter().filter_map(parse).collect(); + let full_name = format!("{parent_name}-{kind}"); + let children: Vec<_> = + children.into_iter().filter_map(|s| parse(s, &full_name)).collect(); let children_duration = children.iter().map(|c| c.duration).sum::(); Some(BuildStep { r#type: kind.to_string(), children, + full_name, duration: children_duration + Duration::from_secs_f64(*duration_excluding_children_sec), }) @@ -138,8 +143,13 @@ impl BuildStep { } let duration = Duration::from_secs_f64(invocation.duration_including_children_sec); - let children: Vec<_> = invocation.children.iter().filter_map(parse).collect(); - Self { r#type: "total".to_string(), children, duration } + + // The root "total" step is kind of a virtual step that encompasses all other steps, + // but it is not a real parent of the other steps. + // We thus start the parent name hierarchy here and use an empty string + // as the parent name of the top-level steps. + let children: Vec<_> = invocation.children.iter().filter_map(|s| parse(s, "")).collect(); + Self { r#type: "total".to_string(), children, duration, full_name: "total".to_string() } } pub fn find_all_by_type(&self, r#type: &str) -> Vec<&Self> { @@ -159,7 +169,7 @@ impl BuildStep { /// Returns a Vec with all substeps, ordered by their hierarchical order. /// The first element of the tuple is the depth of a given step. - fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> { + pub fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> { let mut substeps: Vec<(u32, &BuildStep)> = Vec::new(); fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) { @@ -180,14 +190,14 @@ pub fn format_build_steps(root: &BuildStep) -> String { let mut output = String::new(); for (level, step) in root.linearize_steps() { - let label = format!( - "{}{}", - ".".repeat(level as usize), - // Bootstrap steps can be generic and thus contain angle brackets (<...>). - // However, Markdown interprets these as HTML, so we need to escap ethem. - step.r#type.replace('<', "<").replace('>', ">") - ); + let label = format!("{}{}", ".".repeat(level as usize), escape_step_name(step)); writeln!(output, "{label:.<65}{:>8.2}s", step.duration.as_secs_f64()).unwrap(); } output } + +/// Bootstrap steps can be generic and thus contain angle brackets (<...>). +/// However, Markdown interprets these as HTML, so we need to escap ethem. +pub fn escape_step_name(step: &BuildStep) -> String { + step.r#type.replace('<', "<").replace('>', ">") +} diff --git a/src/ci/citool/Cargo.lock b/src/ci/citool/Cargo.lock index c061ec6ebdcee..800eaae076650 100644 --- a/src/ci/citool/Cargo.lock +++ b/src/ci/citool/Cargo.lock @@ -107,6 +107,7 @@ dependencies = [ "build_helper", "clap", "csv", + "diff", "glob-match", "insta", "serde", @@ -241,6 +242,12 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "displaydoc" version = "0.2.5" diff --git a/src/ci/citool/Cargo.toml b/src/ci/citool/Cargo.toml index dde09224afe84..f18436a126359 100644 --- a/src/ci/citool/Cargo.toml +++ b/src/ci/citool/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" anyhow = "1" clap = { version = "4.5", features = ["derive"] } csv = "1" +diff = "0.1" glob-match = "0.2" serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" diff --git a/src/ci/citool/src/analysis.rs b/src/ci/citool/src/analysis.rs index 2088ce2962097..ccc030511cbbd 100644 --- a/src/ci/citool/src/analysis.rs +++ b/src/ci/citool/src/analysis.rs @@ -1,33 +1,134 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Debug; use build_helper::metrics::{ - BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps, + BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name, + format_build_steps, }; use crate::metrics; use crate::metrics::{JobMetrics, JobName, get_test_suites}; use crate::utils::{output_details, pluralize}; -pub fn output_bootstrap_stats(metrics: &JsonRoot) { +/// Outputs durations of individual bootstrap steps from the gathered bootstrap invocations, +/// and also a table with summarized information about executed tests. +pub fn output_bootstrap_stats(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) { if !metrics.invocations.is_empty() { println!("# Bootstrap steps"); - record_bootstrap_step_durations(&metrics); + record_bootstrap_step_durations(&metrics, parent_metrics); record_test_suites(&metrics); } } -fn record_bootstrap_step_durations(metrics: &JsonRoot) { +fn record_bootstrap_step_durations(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) { + let parent_steps: HashMap = parent_metrics + .map(|metrics| { + metrics + .invocations + .iter() + .map(|invocation| { + (invocation.cmdline.clone(), BuildStep::from_invocation(invocation)) + }) + .collect() + }) + .unwrap_or_default(); + for invocation in &metrics.invocations { let step = BuildStep::from_invocation(invocation); let table = format_build_steps(&step); eprintln!("Step `{}`\n{table}\n", invocation.cmdline); - output_details(&invocation.cmdline, || { + output_details(&format!("{} (steps)", invocation.cmdline), || { println!("
{table}
"); }); + + // If there was a parent bootstrap invocation with the same cmdline, diff it + if let Some(parent_step) = parent_steps.get(&invocation.cmdline) { + let table = format_build_step_diffs(&step, parent_step); + + let duration_before = parent_step.duration.as_secs(); + let duration_after = step.duration.as_secs(); + output_details( + &format!("{} (diff) ({duration_before}s -> {duration_after}s)", invocation.cmdline), + || { + println!("{table}"); + }, + ); + } } eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len()); } +/// Creates a table that displays a diff between the durations of steps between +/// two bootstrap invocations. +/// It also shows steps that were missing before/after. +fn format_build_step_diffs(current: &BuildStep, parent: &BuildStep) -> String { + use std::fmt::Write; + + // Helper struct that compares steps by their full name + struct StepByName<'a>((u32, &'a BuildStep)); + + impl<'a> PartialEq for StepByName<'a> { + fn eq(&self, other: &Self) -> bool { + self.0.1.full_name.eq(&other.0.1.full_name) + } + } + + fn get_steps(step: &BuildStep) -> Vec { + step.linearize_steps().into_iter().map(|v| StepByName(v)).collect() + } + + let steps_before = get_steps(parent); + let steps_after = get_steps(current); + + let mut table = "| Step | Before | After | Change |\n".to_string(); + writeln!(table, "|:-----|-------:|------:|-------:|").unwrap(); + + // Try to match removed, added and same steps using a classic diff algorithm + for result in diff::slice(&steps_before, &steps_after) { + let (step, before, after, change) = match result { + // The step was found both before and after + diff::Result::Both(before, after) => { + let duration_before = before.0.1.duration; + let duration_after = after.0.1.duration; + let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64(); + let pct_change = pct_change * 100.0; + // Normalize around 100, to get + for regression and - for improvements + let pct_change = pct_change - 100.0; + ( + before, + format!("{:.2}s", duration_before.as_secs_f64()), + format!("{:.2}s", duration_after.as_secs_f64()), + format!("{pct_change:.1}%"), + ) + } + // The step was only found in the parent, so it was likely removed + diff::Result::Left(removed) => ( + removed, + format!("{:.2}s", removed.0.1.duration.as_secs_f64()), + "".to_string(), + "(removed)".to_string(), + ), + // The step was only found in the current commit, so it was likely added + diff::Result::Right(added) => ( + added, + "".to_string(), + format!("{:.2}s", added.0.1.duration.as_secs_f64()), + "(added)".to_string(), + ), + }; + + let prefix = ".".repeat(step.0.0 as usize); + writeln!( + table, + "| {prefix}{} | {before} | {after} | {change} |", + escape_step_name(step.0.1), + ) + .unwrap(); + } + + table +} + fn record_test_suites(metrics: &JsonRoot) { let suites = metrics::get_test_suites(&metrics); @@ -82,8 +183,7 @@ fn render_table(suites: BTreeMap) -> String { table } -/// Computes a post merge CI analysis report of test differences -/// between the `parent` and `current` commits. +/// Outputs a report of test differences between the `parent` and `current` commits. pub fn output_test_diffs(job_metrics: HashMap) { let aggregated_test_diffs = aggregate_test_diffs(&job_metrics); report_test_diffs(aggregated_test_diffs); diff --git a/src/ci/citool/src/main.rs b/src/ci/citool/src/main.rs index 5a84ecb5e47e5..5f5c50dc43a0e 100644 --- a/src/ci/citool/src/main.rs +++ b/src/ci/citool/src/main.rs @@ -144,31 +144,35 @@ fn postprocess_metrics( job_name: Option, ) -> anyhow::Result<()> { let metrics = load_metrics(&metrics_path)?; - output_bootstrap_stats(&metrics); - let (Some(parent), Some(job_name)) = (parent, job_name) else { - return Ok(()); - }; - - // This command is executed also on PR builds, which might not have parent metrics - // available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics - // due to missing permissions. - // To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs, - // we simply print an error if the parent metrics were not found, but otherwise exit - // successfully. - match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") { - Ok(parent_metrics) => { - let job_metrics = HashMap::from([( - job_name, - JobMetrics { parent: Some(parent_metrics), current: metrics }, - )]); - output_test_diffs(job_metrics); - } - Err(error) => { - eprintln!("Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}"); + if let (Some(parent), Some(job_name)) = (parent, job_name) { + // This command is executed also on PR builds, which might not have parent metrics + // available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics + // due to missing permissions. + // To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs, + // we simply print an error if the parent metrics were not found, but otherwise exit + // successfully. + match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") { + Ok(parent_metrics) => { + output_bootstrap_stats(&metrics, Some(&parent_metrics)); + + let job_metrics = HashMap::from([( + job_name, + JobMetrics { parent: Some(parent_metrics), current: metrics }, + )]); + output_test_diffs(job_metrics); + return Ok(()); + } + Err(error) => { + eprintln!( + "Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}" + ); + } } } + output_bootstrap_stats(&metrics, None); + Ok(()) }