Skip to content

coverage-dump: Resolve global file IDs to filenames #140251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ name = "coverage-dump"
version = "0.1.0"
dependencies = [
"anyhow",
"itertools",
"leb128",
"md-5",
"miniz_oxide 0.7.4",
Expand Down
67 changes: 67 additions & 0 deletions src/bootstrap/src/core/build_steps/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,3 +527,70 @@ tool_check_step!(Bootstrap { path: "src/bootstrap", default: false });
// `run-make-support` will be built as part of suitable run-make compiletest test steps, but support
// check to make it easier to work on.
tool_check_step!(RunMakeSupport { path: "src/tools/run-make-support", default: false });

/// Check step for the `coverage-dump` bootstrap tool. The coverage-dump tool
/// is used internally by coverage tests.
///
/// FIXME(Zalathar): This is temporarily separate from the other tool check
/// steps so that it can use the stage 0 compiler instead of `top_stage`,
/// without introducing conflicts with the stage 0 redesign (#119899).
///
/// After the stage 0 redesign lands, we can look into using the stage 0
/// compiler to check all bootstrap tools (#139170).
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct CoverageDump;

impl CoverageDump {
const PATH: &str = "src/tools/coverage-dump";
}

impl Step for CoverageDump {
type Output = ();

/// Most contributors won't care about coverage-dump, so don't make their
/// check builds slower unless they opt in and check it explicitly.
const DEFAULT: bool = false;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path(Self::PATH)
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(Self {});
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
// Make sure we haven't forgotten any fields, if there are any.
let Self {} = self;
let display_name = "coverage-dump";
let host = builder.config.build;
let target = host;
let mode = Mode::ToolBootstrap;

let compiler = builder.compiler(0, host);
let cargo = prepare_tool_cargo(
builder,
compiler,
mode,
target,
builder.kind,
Self::PATH,
SourceType::InTree,
&[],
);

let stamp = BuildStamp::new(&builder.cargo_out(compiler, mode, target))
.with_prefix(&format!("{display_name}-check"));

let _guard = builder.msg_tool(
builder.kind,
mode,
display_name,
compiler.stage,
&compiler.host,
&target,
);
run_cargo(builder, cargo, builder.config.free_args.clone(), &stamp, vec![], true, false);
}
}
28 changes: 28 additions & 0 deletions src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,3 +392,31 @@ impl Step for CyclicStep {
builder.ensure(CyclicStep { n: self.n.saturating_sub(1) })
}
}

/// Step to manually run the coverage-dump tool (`./x run coverage-dump`).
///
/// The coverage-dump tool is an internal detail of coverage tests, so this run
/// step is only needed when testing coverage-dump manually.
#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
pub struct CoverageDump;

impl Step for CoverageDump {
type Output = ();

const DEFAULT: bool = false;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/coverage-dump")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(Self {});
}

fn run(self, builder: &Builder<'_>) {
let mut cmd = builder.tool_cmd(Tool::CoverageDump);
cmd.args(&builder.config.free_args);
cmd.run(builder);
}
}
1 change: 1 addition & 0 deletions src/bootstrap/src/core/build_steps/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ impl Step for CrateBootstrap {
run.path("src/tools/jsondoclint")
.path("src/tools/suggest-tests")
.path("src/tools/replace-version-placeholder")
.path("src/tools/coverage-dump")
// We want `./x test tidy` to _run_ the tidy tool, not its tests.
// So we need a separate alias to test the tidy tool itself.
.alias("tidyselftest")
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/src/core/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ impl<'a> Builder<'a> {
check::RunMakeSupport,
check::Compiletest,
check::FeaturesStatusDump,
check::CoverageDump,
),
Kind::Test => describe!(
crate::core::build_steps::toolstate::ToolStateCheck,
Expand Down Expand Up @@ -1114,6 +1115,7 @@ impl<'a> Builder<'a> {
run::UnicodeTableGenerator,
run::FeaturesStatusDump,
run::CyclicStep,
run::CoverageDump,
),
Kind::Setup => {
describe!(setup::Profile, setup::Hook, setup::Link, setup::Editor)
Expand Down
1 change: 1 addition & 0 deletions src/tools/coverage-dump/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edition = "2021"

[dependencies]
anyhow = "1.0.71"
itertools = "0.12"
leb128 = "0.2.5"
md5 = { package = "md-5" , version = "0.10.5" }
miniz_oxide = "0.7.1"
Expand Down
86 changes: 57 additions & 29 deletions src/tools/coverage-dump/src/covfun.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
use std::collections::HashMap;
use std::fmt::{self, Debug, Write as _};
use std::sync::OnceLock;
use std::sync::LazyLock;

use anyhow::{Context, anyhow};
use anyhow::{Context, anyhow, bail, ensure};
use itertools::Itertools;
use regex::Regex;

use crate::parser::{Parser, unescape_llvm_string_contents};
use crate::covmap::FilenameTables;
use crate::llvm_utils::unescape_llvm_string_contents;
use crate::parser::Parser;

#[cfg(test)]
mod tests;

pub(crate) fn dump_covfun_mappings(
llvm_ir: &str,
filename_tables: &FilenameTables,
function_names: &HashMap<u64, String>,
) -> anyhow::Result<()> {
// Extract function coverage entries from the LLVM IR assembly, and associate
// each entry with its (demangled) name.
let mut covfun_entries = llvm_ir
.lines()
.filter_map(covfun_line_data)
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
.collect::<Vec<_>>();
.filter(|line| is_covfun_line(line))
.map(parse_covfun_line)
.map_ok(|line_data| {
(function_names.get(&line_data.name_hash).map(String::as_str), line_data)
})
.collect::<Result<Vec<_>, _>>()?;
covfun_entries.sort_by(|a, b| {
// Sort entries primarily by name, to help make the order consistent
// across platforms and relatively insensitive to changes.
Expand All @@ -41,8 +51,12 @@ pub(crate) fn dump_covfun_mappings(
println!("Number of files: {num_files}");

for i in 0..num_files {
let global_file_id = parser.read_uleb128_u32()?;
println!("- file {i} => global file {global_file_id}");
let global_file_id = parser.read_uleb128_usize()?;
let &CovfunLineData { filenames_hash, .. } = line_data;
let Some(filename) = filename_tables.lookup(filenames_hash, global_file_id) else {
bail!("couldn't resolve global file: {filenames_hash}, {global_file_id}");
};
println!("- file {i} => {filename}");
}

let num_expressions = parser.read_uleb128_u32()?;
Expand Down Expand Up @@ -107,36 +121,50 @@ pub(crate) fn dump_covfun_mappings(
Ok(())
}

#[derive(Debug, PartialEq, Eq)]
struct CovfunLineData {
name_hash: u64,
is_used: bool,
name_hash: u64,
filenames_hash: u64,
payload: Vec<u8>,
}

/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
/// entry, and if so extracts relevant data in a `CovfunLineData`.
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
let re = {
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
// rather than the section name, because the section name is harder to
// extract and differs across Linux/Windows/macOS. We also extract the
// symbol name hash from the variable name rather than the data, since
// it's easier and both should match.
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
)
.unwrap()
})
};
fn is_covfun_line(line: &str) -> bool {
line.starts_with("@__covrec_")
}

let captures = re.captures(line)?;
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
/// entry, parses it to extract relevant data in a `CovfunLineData`.
fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
ensure!(is_covfun_line(line));

// We cheat a little bit and match variable names `@__covrec_[HASH]u`
// rather than the section name, because the section name is harder to
// extract and differs across Linux/Windows/macOS.
const RE_STRING: &str = r#"(?x)^
@__covrec_[0-9A-Z]+(?<is_used>u)?
\ = \ # (trailing space)
.*
<\{
\ i64 \ (?<name_hash> -? [0-9]+),
\ i32 \ -? [0-9]+, # (length of payload; currently unused)
\ i64 \ -? [0-9]+, # (source hash; currently unused)
\ i64 \ (?<filenames_hash> -? [0-9]+),
\ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
\ # (trailing space)
}>
.*$
"#;
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());

let captures =
RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
let is_used = captures.name("is_used").is_some();
let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
let payload = unescape_llvm_string_contents(&captures["payload"]);

Some(CovfunLineData { name_hash, is_used, payload })
Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
}

// Extra parser methods only needed when parsing `covfun` payloads.
Expand Down
53 changes: 53 additions & 0 deletions src/tools/coverage-dump/src/covfun/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use super::{CovfunLineData, parse_covfun_line};

/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
/// to emit them in signed form, so this helper function converts `i64` to `u64`.
fn as_u64(x: i64) -> u64 {
x as u64
}

#[test]
fn parse_covfun_line_data() {
struct Case {
line: &'static str,
expected: CovfunLineData,
}
let cases = &[
// Copied from `trivial.ll`:
Case {
line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(5307978893922758273),
filenames_hash: as_u64(6343436898695299756),
payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
},
},
// Copied from `on-off-sandwich.ll`:
Case {
line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(-3400688559180533350),
filenames_hash: as_u64(892196767019953100),
payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
},
},
// Copied from `no-core.ll`:
Case {
line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(1116917981370409222),
filenames_hash: as_u64(-3625186110715410276),
payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
},
},
];

for &Case { line, ref expected } in cases {
println!("- {line}");
let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
assert_eq!(line_data.as_ref(), Ok(expected));
}
}
75 changes: 75 additions & 0 deletions src/tools/coverage-dump/src/covmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use std::collections::HashMap;
use std::sync::LazyLock;

use anyhow::{Context, ensure};
use regex::Regex;

use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents};
use crate::parser::Parser;

#[derive(Debug, Default)]
pub(crate) struct FilenameTables {
map: HashMap<u64, Vec<String>>,
}

impl FilenameTables {
pub(crate) fn lookup(&self, filenames_hash: u64, global_file_id: usize) -> Option<&str> {
let table = self.map.get(&filenames_hash)?;
let filename = table.get(global_file_id)?;
Some(filename)
}
}

struct CovmapLineData {
payload: Vec<u8>,
}

pub(crate) fn make_filename_tables(llvm_ir: &str) -> anyhow::Result<FilenameTables> {
let mut map = HashMap::default();

for line in llvm_ir.lines().filter(|line| is_covmap_line(line)) {
let CovmapLineData { payload } = parse_covmap_line(line)?;

let mut parser = Parser::new(&payload);
let n_filenames = parser.read_uleb128_usize()?;
let uncompressed_bytes = parser.read_chunk_to_uncompressed_bytes()?;
parser.ensure_empty()?;

let mut filenames_table = vec![];

let mut parser = Parser::new(&uncompressed_bytes);
for _ in 0..n_filenames {
let len = parser.read_uleb128_usize()?;
let bytes = parser.read_n_bytes(len)?;
let filename = str::from_utf8(bytes)?;
filenames_table.push(filename.to_owned());
}

let filenames_hash = truncated_md5(&payload);
map.insert(filenames_hash, filenames_table);
}

Ok(FilenameTables { map })
}

fn is_covmap_line(line: &str) -> bool {
line.starts_with("@__llvm_coverage_mapping ")
}

fn parse_covmap_line(line: &str) -> anyhow::Result<CovmapLineData> {
ensure!(is_covmap_line(line));

const RE_STRING: &str = r#"(?x)^
@__llvm_coverage_mapping \ =
.*
\[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
.*$
"#;
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());

let captures =
RE.captures(line).with_context(|| format!("couldn't parse covmap line: {line:?}"))?;
let payload = unescape_llvm_string_contents(&captures["payload"]);

Ok(CovmapLineData { payload })
}
Loading
Loading