Skip to content

Add bootstrap step diff to CI job analysis #138930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 38 additions & 23 deletions src/build_helper/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,26 +109,31 @@ pub struct BuildStep {
pub r#type: String,
pub children: Vec<BuildStep>,
pub duration: Duration,
// Full name of the step, including all parent names
pub full_name: String,
}

impl BuildStep {
/// Create a `BuildStep` representing a single invocation of bootstrap.
/// The most important thing is that the build step aggregates the
/// durations of all children, so that it can be easily accessed.
pub fn from_invocation(invocation: &JsonInvocation) -> Self {
fn parse(node: &JsonNode) -> Option<BuildStep> {
fn parse(node: &JsonNode, parent_name: &str) -> Option<BuildStep> {
match node {
JsonNode::RustbuildStep {
type_: kind,
children,
duration_excluding_children_sec,
..
} => {
let children: Vec<_> = children.into_iter().filter_map(parse).collect();
let full_name = format!("{parent_name}-{kind}");
let children: Vec<_> =
children.into_iter().filter_map(|s| parse(s, &full_name)).collect();
let children_duration = children.iter().map(|c| c.duration).sum::<Duration>();
Some(BuildStep {
r#type: kind.to_string(),
children,
full_name,
duration: children_duration
+ Duration::from_secs_f64(*duration_excluding_children_sec),
})
Expand All @@ -138,8 +143,13 @@ impl BuildStep {
}

let duration = Duration::from_secs_f64(invocation.duration_including_children_sec);
let children: Vec<_> = invocation.children.iter().filter_map(parse).collect();
Self { r#type: "total".to_string(), children, duration }

// The root "total" step is kind of a virtual step that encompasses all other steps,
// but it is not a real parent of the other steps.
// We thus start the parent name hierarchy here and use an empty string
// as the parent name of the top-level steps.
let children: Vec<_> = invocation.children.iter().filter_map(|s| parse(s, "")).collect();
Self { r#type: "total".to_string(), children, duration, full_name: "total".to_string() }
}

pub fn find_all_by_type(&self, r#type: &str) -> Vec<&Self> {
Expand All @@ -156,33 +166,38 @@ impl BuildStep {
child.find_by_type(r#type, result);
}
}
}

/// Writes build steps into a nice indented table.
pub fn format_build_steps(root: &BuildStep) -> String {
use std::fmt::Write;

let mut substeps: Vec<(u32, &BuildStep)> = Vec::new();
/// Returns a Vec with all substeps, ordered by their hierarchical order.
/// The first element of the tuple is the depth of a given step.
pub fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> {
let mut substeps: Vec<(u32, &BuildStep)> = Vec::new();

fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) {
substeps.push((level, step));
for child in &step.children {
visit(child, level + 1, substeps);
fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) {
substeps.push((level, step));
for child in &step.children {
visit(child, level + 1, substeps);
}
}

visit(self, 0, &mut substeps);
substeps
}
}

visit(root, 0, &mut substeps);
/// Writes build steps into a nice indented table.
pub fn format_build_steps(root: &BuildStep) -> String {
use std::fmt::Write;

let mut output = String::new();
for (level, step) in substeps {
let label = format!(
"{}{}",
".".repeat(level as usize),
// Bootstrap steps can be generic and thus contain angle brackets (<...>).
// However, Markdown interprets these as HTML, so we need to escap ethem.
step.r#type.replace('<', "&lt;").replace('>', "&gt;")
);
for (level, step) in root.linearize_steps() {
let label = format!("{}{}", ".".repeat(level as usize), escape_step_name(step));
writeln!(output, "{label:.<65}{:>8.2}s", step.duration.as_secs_f64()).unwrap();
}
output
}

/// Bootstrap steps can be generic and thus contain angle brackets (<...>).
/// However, Markdown interprets these as HTML, so we need to escap ethem.
pub fn escape_step_name(step: &BuildStep) -> String {
step.r#type.replace('<', "&lt;").replace('>', "&gt;")
}
7 changes: 7 additions & 0 deletions src/ci/citool/Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ dependencies = [
"build_helper",
"clap",
"csv",
"diff",
"glob-match",
"insta",
"serde",
Expand Down Expand Up @@ -241,6 +242,12 @@ dependencies = [
"powerfmt",
]

[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"

[[package]]
name = "displaydoc"
version = "0.2.5"
Expand Down
1 change: 1 addition & 0 deletions src/ci/citool/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
anyhow = "1"
clap = { version = "4.5", features = ["derive"] }
csv = "1"
diff = "0.1"
glob-match = "0.2"
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"
Expand Down
114 changes: 107 additions & 7 deletions src/ci/citool/src/analysis.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,134 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;

use build_helper::metrics::{
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name,
format_build_steps,
};

use crate::metrics;
use crate::metrics::{JobMetrics, JobName, get_test_suites};
use crate::utils::{output_details, pluralize};

pub fn output_bootstrap_stats(metrics: &JsonRoot) {
/// Outputs durations of individual bootstrap steps from the gathered bootstrap invocations,
/// and also a table with summarized information about executed tests.
pub fn output_bootstrap_stats(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) {
if !metrics.invocations.is_empty() {
println!("# Bootstrap steps");
record_bootstrap_step_durations(&metrics);
record_bootstrap_step_durations(&metrics, parent_metrics);
record_test_suites(&metrics);
}
}

fn record_bootstrap_step_durations(metrics: &JsonRoot) {
fn record_bootstrap_step_durations(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) {
let parent_steps: HashMap<String, BuildStep> = parent_metrics
.map(|metrics| {
metrics
.invocations
.iter()
.map(|invocation| {
(invocation.cmdline.clone(), BuildStep::from_invocation(invocation))
})
.collect()
})
.unwrap_or_default();

for invocation in &metrics.invocations {
let step = BuildStep::from_invocation(invocation);
let table = format_build_steps(&step);
eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
output_details(&invocation.cmdline, || {
output_details(&format!("{} (steps)", invocation.cmdline), || {
println!("<pre><code>{table}</code></pre>");
});

// If there was a parent bootstrap invocation with the same cmdline, diff it
if let Some(parent_step) = parent_steps.get(&invocation.cmdline) {
let table = format_build_step_diffs(&step, parent_step);

let duration_before = parent_step.duration.as_secs();
let duration_after = step.duration.as_secs();
output_details(
&format!("{} (diff) ({duration_before}s -> {duration_after}s)", invocation.cmdline),
|| {
println!("{table}");
},
);
}
}
eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
}

/// Creates a table that displays a diff between the durations of steps between
/// two bootstrap invocations.
/// It also shows steps that were missing before/after.
fn format_build_step_diffs(current: &BuildStep, parent: &BuildStep) -> String {
use std::fmt::Write;

// Helper struct that compares steps by their full name
struct StepByName<'a>((u32, &'a BuildStep));

impl<'a> PartialEq for StepByName<'a> {
fn eq(&self, other: &Self) -> bool {
self.0.1.full_name.eq(&other.0.1.full_name)
}
}

fn get_steps(step: &BuildStep) -> Vec<StepByName> {
step.linearize_steps().into_iter().map(|v| StepByName(v)).collect()
}

let steps_before = get_steps(parent);
let steps_after = get_steps(current);

let mut table = "| Step | Before | After | Change |\n".to_string();
writeln!(table, "|:-----|-------:|------:|-------:|").unwrap();

// Try to match removed, added and same steps using a classic diff algorithm
for result in diff::slice(&steps_before, &steps_after) {
let (step, before, after, change) = match result {
// The step was found both before and after
diff::Result::Both(before, after) => {
let duration_before = before.0.1.duration;
let duration_after = after.0.1.duration;
let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64();
let pct_change = pct_change * 100.0;
// Normalize around 100, to get + for regression and - for improvements
let pct_change = pct_change - 100.0;
(
before,
format!("{:.2}s", duration_before.as_secs_f64()),
format!("{:.2}s", duration_after.as_secs_f64()),
format!("{pct_change:.1}%"),
)
}
// The step was only found in the parent, so it was likely removed
diff::Result::Left(removed) => (
removed,
format!("{:.2}s", removed.0.1.duration.as_secs_f64()),
"".to_string(),
"(removed)".to_string(),
),
// The step was only found in the current commit, so it was likely added
diff::Result::Right(added) => (
added,
"".to_string(),
format!("{:.2}s", added.0.1.duration.as_secs_f64()),
"(added)".to_string(),
),
};

let prefix = ".".repeat(step.0.0 as usize);
writeln!(
table,
"| {prefix}{} | {before} | {after} | {change} |",
escape_step_name(step.0.1),
)
.unwrap();
}

table
}

fn record_test_suites(metrics: &JsonRoot) {
let suites = metrics::get_test_suites(&metrics);

Expand Down Expand Up @@ -82,8 +183,7 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
table
}

/// Computes a post merge CI analysis report of test differences
/// between the `parent` and `current` commits.
/// Outputs a report of test differences between the `parent` and `current` commits.
pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
report_test_diffs(aggregated_test_diffs);
Expand Down
46 changes: 25 additions & 21 deletions src/ci/citool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,31 +144,35 @@ fn postprocess_metrics(
job_name: Option<String>,
) -> anyhow::Result<()> {
let metrics = load_metrics(&metrics_path)?;
output_bootstrap_stats(&metrics);

let (Some(parent), Some(job_name)) = (parent, job_name) else {
return Ok(());
};

// This command is executed also on PR builds, which might not have parent metrics
// available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
// due to missing permissions.
// To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
// we simply print an error if the parent metrics were not found, but otherwise exit
// successfully.
match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
Ok(parent_metrics) => {
let job_metrics = HashMap::from([(
job_name,
JobMetrics { parent: Some(parent_metrics), current: metrics },
)]);
output_test_diffs(job_metrics);
}
Err(error) => {
eprintln!("Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}");
if let (Some(parent), Some(job_name)) = (parent, job_name) {
// This command is executed also on PR builds, which might not have parent metrics
// available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
// due to missing permissions.
// To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
// we simply print an error if the parent metrics were not found, but otherwise exit
// successfully.
match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
Ok(parent_metrics) => {
output_bootstrap_stats(&metrics, Some(&parent_metrics));

let job_metrics = HashMap::from([(
job_name,
JobMetrics { parent: Some(parent_metrics), current: metrics },
)]);
output_test_diffs(job_metrics);
return Ok(());
}
Err(error) => {
eprintln!(
"Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}"
);
}
}
}

output_bootstrap_stats(&metrics, None);

Ok(())
}

Expand Down
Loading