Skip to content

Commit 6d34a2b

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 8656e1b commit 6d34a2b

File tree

8 files changed

+97
-27
lines changed

8 files changed

+97
-27
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ fn link_rlib<'a>(
298298
let (metadata, metadata_position) = create_wrapper_file(
299299
sess,
300300
".rmeta".to_string(),
301-
codegen_results.metadata.raw_data(),
301+
codegen_results.metadata.maybe_reference(),
302302
);
303303
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
304304
match metadata_position {

compiler/rustc_codegen_ssa/src/back/metadata.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,8 @@ pub fn create_compressed_metadata_file(
578578
symbol_name: &str,
579579
) -> Vec<u8> {
580580
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
581-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
582-
packed_metadata.extend(metadata.raw_data());
581+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
582+
packed_metadata.extend(metadata.maybe_reference());
583583

584584
let Some(mut file) = create_object_file(sess) else {
585585
if sess.target.is_like_wasm {

compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ fn test_unstable_options_tracking_hash() {
854854
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
855855
tracked!(small_data_threshold, Some(16));
856856
tracked!(split_lto_unit, Some(true));
857+
tracked!(split_metadata, true);
857858
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
858859
tracked!(stack_protector, StackProtector::All);
859860
tracked!(teach, true);

compiler/rustc_metadata/src/fs.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");
5455

5556
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5657
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6061
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6162
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6263
});
64+
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
65+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
66+
});
6367
}
6468
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
69+
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
6670
}
6771
};
6872

@@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100104

101105
// Load metadata back to memory: codegen may need to include it in object files.
102106
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
107+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
108+
.unwrap_or_else(|err| {
109+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
110+
});
106111

107112
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108113

compiler/rustc_metadata/src/locator.rs

+5
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,11 @@ impl<'a> CrateLocator<'a> {
580580
) {
581581
Ok(blob) => {
582582
if let Some(h) = self.crate_matches(&blob, &lib) {
583+
if blob.get_header().is_reference {
584+
if slot.is_none() {
585+
todo!("return error");
586+
}
587+
}
583588
(h, blob)
584589
} else {
585590
info!("metadata mismatch");

compiler/rustc_metadata/src/rmeta/encoder.rs

+72-19
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
701701
triple: tcx.sess.opts.target_triple.clone(),
702702
hash: tcx.crate_hash(LOCAL_CRATE),
703703
is_proc_macro_crate: proc_macro_data.is_some(),
704+
is_reference: false,
704705
},
705706
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
706707
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2226,42 +2227,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
22262227
// generated regardless of trailing bytes that end up in it.
22272228

22282229
pub struct EncodedMetadata {
2229-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2230-
mmap: Option<Mmap>,
2230+
// The declaration order matters because `full_mmap` should be dropped
2231+
// before `_temp_dir`.
2232+
full_mmap: Option<Mmap>,
2233+
reference: Option<Vec<u8>>,
22312234
// We need to carry MaybeTempDir to avoid deleting the temporary
22322235
// directory while accessing the Mmap.
22332236
_temp_dir: Option<MaybeTempDir>,
22342237
}
22352238

22362239
impl EncodedMetadata {
22372240
#[inline]
2238-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2241+
pub fn from_path(
2242+
path: PathBuf,
2243+
reference_path: PathBuf,
2244+
temp_dir: Option<MaybeTempDir>,
2245+
) -> std::io::Result<Self> {
22392246
let file = std::fs::File::open(&path)?;
22402247
let file_metadata = file.metadata()?;
22412248
if file_metadata.len() == 0 {
2242-
return Ok(Self { mmap: None, _temp_dir: None });
2249+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
22432250
}
2244-
let mmap = unsafe { Some(Mmap::map(file)?) };
2245-
Ok(Self { mmap, _temp_dir: temp_dir })
2251+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2252+
2253+
let reference = std::fs::read(reference_path)?;
2254+
let reference = if reference.is_empty() { None } else { Some(reference) };
2255+
2256+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
2257+
}
2258+
2259+
#[inline]
2260+
pub fn full(&self) -> &[u8] {
2261+
&self.full_mmap.as_deref().unwrap_or_default()
22462262
}
22472263

22482264
#[inline]
2249-
pub fn raw_data(&self) -> &[u8] {
2250-
self.mmap.as_deref().unwrap_or_default()
2265+
pub fn maybe_reference(&self) -> &[u8] {
2266+
self.reference.as_deref().unwrap_or(self.full())
22512267
}
22522268
}
22532269

22542270
impl<S: Encoder> Encodable<S> for EncodedMetadata {
22552271
fn encode(&self, s: &mut S) {
2256-
let slice = self.raw_data();
2272+
self.reference.encode(s);
2273+
2274+
let slice = self.full();
22572275
slice.encode(s)
22582276
}
22592277
}
22602278

22612279
impl<D: Decoder> Decodable<D> for EncodedMetadata {
22622280
fn decode(d: &mut D) -> Self {
2281+
let reference = <Option<Vec<u8>>>::decode(d);
2282+
22632283
let len = d.read_usize();
2264-
let mmap = if len > 0 {
2284+
let full_mmap = if len > 0 {
22652285
let mut mmap = MmapMut::map_anon(len).unwrap();
22662286
for _ in 0..len {
22672287
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
@@ -2272,11 +2292,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
22722292
None
22732293
};
22742294

2275-
Self { mmap, _temp_dir: None }
2295+
Self { full_mmap, reference, _temp_dir: None }
22762296
}
22772297
}
22782298

2279-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2299+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
22802300
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22812301

22822302
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2290,6 +2310,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22902310
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22912311
}
22922312

2313+
with_encode_metadata_header(tcx, path, |ecx| {
2314+
// Encode all the entries and extra information in the crate,
2315+
// culminating in the `CrateRoot` which points to all of it.
2316+
let root = ecx.encode_crate_root();
2317+
2318+
// Flush buffer to ensure backing file has the correct size.
2319+
ecx.opaque.flush();
2320+
// Record metadata size for self-profiling
2321+
tcx.prof.artifact_size(
2322+
"crate_metadata",
2323+
"crate_metadata",
2324+
ecx.opaque.file().metadata().unwrap().len(),
2325+
);
2326+
2327+
root.position.get()
2328+
});
2329+
2330+
if tcx.sess.opts.unstable_opts.split_metadata
2331+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
2332+
{
2333+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2334+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2335+
name: tcx.crate_name(LOCAL_CRATE),
2336+
triple: tcx.sess.opts.target_triple.clone(),
2337+
hash: tcx.crate_hash(LOCAL_CRATE),
2338+
is_proc_macro_crate: false,
2339+
is_reference: true,
2340+
});
2341+
header.position.get()
2342+
});
2343+
}
2344+
}
2345+
2346+
fn with_encode_metadata_header(
2347+
tcx: TyCtxt<'_>,
2348+
path: &Path,
2349+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2350+
) {
22932351
let mut encoder = opaque::FileEncoder::new(path)
22942352
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22952353
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2324,9 +2382,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23242382
// Encode the rustc version string in a predictable location.
23252383
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
23262384

2327-
// Encode all the entries and extra information in the crate,
2328-
// culminating in the `CrateRoot` which points to all of it.
2329-
let root = ecx.encode_crate_root();
2385+
let root_position = f(&mut ecx);
23302386

23312387
// Make sure we report any errors from writing to the file.
23322388
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2336,12 +2392,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23362392
}
23372393

23382394
let file = ecx.opaque.file();
2339-
if let Err(err) = encode_root_position(file, root.position.get()) {
2395+
if let Err(err) = encode_root_position(file, root_position) {
23402396
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
23412397
}
2342-
2343-
// Record metadata size for self-profiling
2344-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
23452398
}
23462399

23472400
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ pub(crate) struct CrateHeader {
220220
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
221221
/// time ProcMacroData changes.
222222
pub(crate) is_proc_macro_crate: bool,
223+
/// Whether this header is a reference to a separate rmeta file.
224+
///
225+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
226+
pub(crate) is_reference: bool,
223227
}
224228

225229
/// Serialized `.rmeta` data for a crate.

compiler/rustc_session/src/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,8 @@ written to standard error output)"),
21282128
by the linker"),
21292129
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
21302130
"enable LTO unit splitting (default: no)"),
2131+
split_metadata: bool = (false, parse_bool, [TRACKED],
2132+
"split metadata out of libraries into .rmeta files"),
21312133
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
21322134
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
21332135
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)