Skip to content

Commit fc0c84f

Browse files
committed
coverage: Build the CGU's global file table as late as possible
1 parent 17ffbc8 commit fc0c84f

File tree

2 files changed

+110
-80
lines changed

2 files changed

+110
-80
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+84-64
Original file line numberDiff line numberDiff line change
@@ -56,31 +56,26 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
5656
None => return,
5757
};
5858

59-
// The order of entries in this global file table needs to be deterministic,
60-
// and ideally should also be independent of the details of stable-hashing,
61-
// because coverage tests snapshots (`.cov-map`) can observe the order and
62-
// would need to be re-blessed if it changes. As long as those requirements
63-
// are satisfied, the order can be arbitrary.
64-
let mut global_file_table = GlobalFileTable::new();
65-
6659
let mut covfun_records = instances_used
6760
.iter()
6861
.copied()
6962
// Sort by symbol name, so that the global file table is built in an
7063
// order that doesn't depend on the stable-hash-based order in which
7164
// instances were visited during codegen.
7265
.sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
73-
.filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
66+
.filter_map(|instance| prepare_covfun_record(tcx, instance, true))
7467
.collect::<Vec<_>>();
7568

7669
// In a single designated CGU, also prepare covfun records for functions
7770
// in this crate that were instrumented for coverage, but are unused.
7871
if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
7972
let mut unused_instances = gather_unused_function_instances(cx);
80-
// Sort the unused instances by symbol name, for the same reason as the used ones.
73+
// Sort the unused instances by symbol name, so that their order doesn't
74+
// depend on the stable-hash-sensitive order that they were discovered in.
8175
unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
8276
covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
83-
prepare_covfun_record(tcx, &mut global_file_table, instance, false)
77+
// Prepare a suitable covfun record, with all counters forced to zero.
78+
prepare_covfun_record(tcx, instance, false)
8479
}));
8580
}
8681

@@ -93,19 +88,17 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
9388
return;
9489
}
9590

96-
// Encode all filenames referenced by coverage mappings in this CGU.
97-
let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
98-
// The `llvm-cov` tool uses this hash to associate each covfun record with
99-
// its corresponding filenames table, since the final binary will typically
100-
// contain multiple covmap records from different compilation units.
101-
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
91+
// Prepare the global file table for this CGU, containing all paths needed
92+
// by one or more covfun records.
93+
let global_file_table =
94+
GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files()));
10295

10396
let mut unused_function_names = vec![];
10497

10598
for covfun in &covfun_records {
10699
unused_function_names.extend(covfun.mangled_function_name_if_unused());
107100

108-
covfun::generate_covfun_record(cx, filenames_hash, covfun)
101+
covfun::generate_covfun_record(cx, &global_file_table, covfun)
109102
}
110103

111104
// For unused functions, we need to take their mangled names and store them
@@ -130,54 +123,76 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
130123
// Generate the coverage map header, which contains the filenames used by
131124
// this CGU's coverage mappings, and store it in a well-known global.
132125
// (This is skipped if we returned early due to having no covfun records.)
133-
generate_covmap_record(cx, covmap_version, &filenames_buffer);
126+
generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer);
134127
}
135128

136-
/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
129+
/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths.
130+
#[derive(Debug)]
137131
struct GlobalFileTable {
138132
/// This "raw" table doesn't include the working dir, so a file's
139133
/// global ID is its index in this set **plus one**.
140-
raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
134+
raw_file_table: FxIndexMap<StableSourceFileId, String>,
135+
136+
/// The file table in encoded form (possibly compressed), which can be
137+
/// included directly in this CGU's `__llvm_covmap` record.
138+
filenames_buffer: Vec<u8>,
139+
140+
/// Truncated hash of the bytes in `filenames_buffer`.
141+
///
142+
/// The `llvm-cov` tool uses this hash to associate each covfun record with
143+
/// its corresponding filenames table, since the final binary will typically
144+
/// contain multiple covmap records from different compilation units.
145+
filenames_hash: u64,
141146
}
142147

143148
impl GlobalFileTable {
144-
fn new() -> Self {
145-
Self { raw_file_table: FxIndexMap::default() }
146-
}
149+
/// Builds a "global file table" for this CGU, mapping numeric IDs to
150+
/// path strings.
151+
fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self {
152+
let mut raw_file_table = FxIndexMap::default();
153+
154+
for file in all_files {
155+
raw_file_table.entry(file.stable_id).or_insert_with(|| {
156+
file.name
157+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
158+
.to_string_lossy()
159+
.into_owned()
160+
});
161+
}
147162

148-
fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
149-
// Ensure the given file has a table entry, and get its index.
150-
let entry = self.raw_file_table.entry(file.stable_id);
151-
let raw_id = entry.index();
152-
entry.or_insert_with(|| Arc::clone(file));
163+
// FIXME(Zalathar): Consider sorting the file table here, but maybe
164+
// only after adding filename support to coverage-dump, so that the
165+
// table order isn't directly visible in `.coverage-map` snapshots.
153166

154-
// The raw file table doesn't include an entry for the working dir
155-
// (which has ID 0), so add 1 to get the correct ID.
156-
GlobalFileId::from_usize(raw_id + 1)
157-
}
167+
let mut table = Vec::with_capacity(raw_file_table.len() + 1);
158168

159-
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
160-
let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
161-
162-
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
163-
// requires setting the first filename to the compilation directory.
164-
// Since rustc generates coverage maps with relative paths, the
165-
// compilation directory can be combined with the relative paths
166-
// to get absolute paths, if needed.
167-
table.push(
168-
tcx.sess
169-
.opts
170-
.working_dir
171-
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
172-
.to_string_lossy(),
173-
);
169+
// Since version 6 of the LLVM coverage mapping format, the first entry
170+
// in the global file table is treated as a base directory, used to
171+
// resolve any other entries that are stored as relative paths.
172+
let base_dir = tcx
173+
.sess
174+
.opts
175+
.working_dir
176+
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
177+
.to_string_lossy();
178+
table.push(base_dir.as_ref());
174179

175180
// Add the regular entries after the base directory.
176-
table.extend(self.raw_file_table.values().map(|file| {
177-
file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
178-
}));
181+
table.extend(raw_file_table.values().map(|name| name.as_str()));
182+
183+
// Encode the file table into a buffer, and get the hash of its encoded
184+
// bytes, so that we can embed that hash in `__llvm_covfun` records.
185+
let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table);
186+
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
187+
188+
Self { raw_file_table, filenames_buffer, filenames_hash }
189+
}
179190

180-
llvm_cov::write_filenames_to_buffer(&table)
191+
fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> {
192+
let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?;
193+
// The raw file table doesn't include an entry for the base dir
194+
// (which has ID 0), so add 1 to get the correct ID.
195+
Some(GlobalFileId::from_usize(raw_id + 1))
181196
}
182197
}
183198

@@ -193,26 +208,31 @@ rustc_index::newtype_index! {
193208
struct LocalFileId {}
194209
}
195210

196-
/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
197-
/// file IDs.
211+
/// Holds a mapping from "local" (per-function) file IDs to their corresponding
212+
/// source files.
198213
#[derive(Debug, Default)]
199214
struct VirtualFileMapping {
200-
local_to_global: IndexVec<LocalFileId, GlobalFileId>,
201-
global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
215+
local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>,
202216
}
203217

204218
impl VirtualFileMapping {
205-
fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId {
206-
*self
207-
.global_to_local
208-
.entry(global_file_id)
209-
.or_insert_with(|| self.local_to_global.push(global_file_id))
219+
fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId {
220+
self.local_file_table.push(Arc::clone(source_file))
210221
}
211222

212-
fn to_vec(&self) -> Vec<u32> {
213-
// This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
214-
// but it isn't hot or expensive enough to justify the extra unsafety.
215-
self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
223+
/// Resolves all of the filenames in this local file mapping to a list of
224+
/// global file IDs in its CGU, for inclusion in this function's
225+
/// `__llvm_covfun` record.
226+
///
227+
/// The global file IDs are returned as `u32` to make FFI easier.
228+
fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> {
229+
self.local_file_table
230+
.iter()
231+
.map(|file| try {
232+
let id = global_file_table.get_existing_id(file)?;
233+
GlobalFileId::as_u32(id)
234+
})
235+
.collect::<Option<Vec<_>>>()
216236
}
217237
}
218238

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs

+26-16
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! [^win]: On Windows the section name is `.lcovfun`.
66
77
use std::ffi::CString;
8+
use std::sync::Arc;
89

910
use rustc_abi::Align;
1011
use rustc_codegen_ssa::traits::{
@@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
1516
MappingKind, Op,
1617
};
1718
use rustc_middle::ty::{Instance, TyCtxt};
18-
use rustc_span::Span;
19+
use rustc_span::{SourceFile, Span};
1920
use rustc_target::spec::HasTargetSpec;
2021
use tracing::debug;
2122

@@ -45,9 +46,16 @@ impl<'tcx> CovfunRecord<'tcx> {
4546
}
4647
}
4748

49+
impl<'tcx> CovfunRecord<'tcx> {
50+
/// Iterator that yields all source files referred to by this function's
51+
/// coverage mappings. Used to build the global file table for the CGU.
52+
pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
53+
self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
54+
}
55+
}
56+
4857
pub(crate) fn prepare_covfun_record<'tcx>(
4958
tcx: TyCtxt<'tcx>,
50-
global_file_table: &mut GlobalFileTable,
5159
instance: Instance<'tcx>,
5260
is_used: bool,
5361
) -> Option<CovfunRecord<'tcx>> {
@@ -65,7 +73,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
6573
regions: ffi::Regions::default(),
6674
};
6775

68-
fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
76+
fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
6977

7078
if covfun.regions.has_no_regions() {
7179
debug!(?covfun, "function has no mappings to embed; skipping");
@@ -100,7 +108,6 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
100108
/// Populates the mapping region tables in the current function's covfun record.
101109
fn fill_region_tables<'tcx>(
102110
tcx: TyCtxt<'tcx>,
103-
global_file_table: &mut GlobalFileTable,
104111
fn_cov_info: &'tcx FunctionCoverageInfo,
105112
ids_info: &'tcx CoverageIdsInfo,
106113
covfun: &mut CovfunRecord<'tcx>,
@@ -114,11 +121,7 @@ fn fill_region_tables<'tcx>(
114121
};
115122
let source_file = source_map.lookup_source_file(first_span.lo());
116123

117-
// Look up the global file ID for that file.
118-
let global_file_id = global_file_table.global_file_id_for_file(&source_file);
119-
120-
// Associate that global file ID with a local file ID for this function.
121-
let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
124+
let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
122125

123126
// In rare cases, _all_ of a function's spans are discarded, and coverage
124127
// codegen needs to handle that gracefully to avoid #133606.
@@ -187,7 +190,7 @@ fn fill_region_tables<'tcx>(
187190
/// as a global variable in the `__llvm_covfun` section.
188191
pub(crate) fn generate_covfun_record<'tcx>(
189192
cx: &CodegenCx<'_, 'tcx>,
190-
filenames_hash: u64,
193+
global_file_table: &GlobalFileTable,
191194
covfun: &CovfunRecord<'tcx>,
192195
) {
193196
let &CovfunRecord {
@@ -199,12 +202,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
199202
ref regions,
200203
} = covfun;
201204

205+
let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
206+
debug_assert!(
207+
false,
208+
"all local files should be present in the global file table: \
209+
global_file_table = {global_file_table:?}, \
210+
virtual_file_mapping = {virtual_file_mapping:?}"
211+
);
212+
return;
213+
};
214+
202215
// Encode the function's coverage mappings into a buffer.
203-
let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
204-
&virtual_file_mapping.to_vec(),
205-
expressions,
206-
regions,
207-
);
216+
let coverage_mapping_buffer =
217+
llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
208218

209219
// A covfun record consists of four target-endian integers, followed by the
210220
// encoded mapping data in bytes. Note that the length field is 32 bits.
@@ -217,7 +227,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
217227
cx.const_u64(func_name_hash),
218228
cx.const_u32(coverage_mapping_buffer.len() as u32),
219229
cx.const_u64(source_hash),
220-
cx.const_u64(filenames_hash),
230+
cx.const_u64(global_file_table.filenames_hash),
221231
cx.const_bytes(&coverage_mapping_buffer),
222232
],
223233
// This struct needs to be packed, so that the 32-bit length field

0 commit comments

Comments
 (0)