Skip to content

Commit 00c206f

Browse files
Remove old symbol naming code.
1 parent 9d5ce13 commit 00c206f

File tree

2 files changed

+89
-240
lines changed

2 files changed

+89
-240
lines changed

src/librustc_trans/back/link.rs

+1-236
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,9 @@ use session::search_paths::PathKind;
2222
use session::Session;
2323
use middle::cstore::{self, CrateStore, LinkMeta};
2424
use middle::cstore::{LinkagePreference, NativeLibraryKind};
25-
use middle::def_id::DefId;
2625
use middle::dependency_format::Linkage;
27-
use middle::ty::{Ty, TyCtxt};
28-
use rustc::front::map::DefPath;
29-
use trans::{CrateContext, CrateTranslation, gensym_name};
26+
use trans::CrateTranslation;
3027
use util::common::time;
31-
use util::sha2::{Digest, Sha256};
3228
use util::fs::fix_windows_verbatim_for_gcc;
3329
use rustc_back::tempdir::TempDir;
3430

@@ -38,16 +34,13 @@ use std::env;
3834
use std::ffi::OsString;
3935
use std::fs;
4036
use std::io::{self, Read, Write};
41-
use std::iter::once;
4237
use std::mem;
4338
use std::path::{Path, PathBuf};
4439
use std::process::Command;
4540
use std::str;
4641
use flate;
47-
use serialize::hex::ToHex;
4842
use syntax::ast;
4943
use syntax::codemap::Span;
50-
use syntax::parse::token::{self, InternedString};
5144
use syntax::attr::AttrMetaMethods;
5245

5346
use rustc_front::hir;
@@ -82,58 +75,6 @@ pub const RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET: usize =
8275
RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET + 8;
8376

8477

85-
/*
86-
* Name mangling and its relationship to metadata. This is complex. Read
87-
* carefully.
88-
*
89-
* The semantic model of Rust linkage is, broadly, that "there's no global
90-
* namespace" between crates. Our aim is to preserve the illusion of this
91-
* model despite the fact that it's not *quite* possible to implement on
92-
* modern linkers. We initially didn't use system linkers at all, but have
93-
* been convinced of their utility.
94-
*
95-
* There are a few issues to handle:
96-
*
97-
* - Linkers operate on a flat namespace, so we have to flatten names.
98-
* We do this using the C++ namespace-mangling technique. Foo::bar
99-
* symbols and such.
100-
*
101-
* - Symbols with the same name but different types need to get different
102-
* linkage-names. We do this by hashing a string-encoding of the type into
103-
* a fixed-size (currently 16-byte hex) cryptographic hash function (CHF:
104-
* we use SHA256) to "prevent collisions". This is not airtight but 16 hex
105-
* digits on uniform probability means you're going to need 2**32 same-name
106-
* symbols in the same process before you're even hitting birthday-paradox
107-
* collision probability.
108-
*
109-
* - Symbols in different crates but with same names "within" the crate need
110-
* to get different linkage-names.
111-
*
112-
* - The hash shown in the filename needs to be predictable and stable for
113-
* build tooling integration. It also needs to be using a hash function
114-
* which is easy to use from Python, make, etc.
115-
*
116-
* So here is what we do:
117-
*
118-
* - Consider the package id; every crate has one (specified with crate_id
119-
* attribute). If a package id isn't provided explicitly, we infer a
120-
* versionless one from the output name. The version will end up being 0.0
121-
* in this case. CNAME and CVERS are taken from this package id. For
122-
* example, github.com/mozilla/CNAME#CVERS.
123-
*
124-
* - Define CMH as SHA256(crateid).
125-
*
126-
* - Define CMH8 as the first 8 characters of CMH.
127-
*
128-
* - Compile our crate to lib CNAME-CMH8-CVERS.so
129-
*
130-
* - Define STH(sym) as SHA256(CMH, type_str(sym))
131-
*
132-
* - Suffix a mangled sym with ::STH@CVERS, so that it is unique in the
133-
* name, non-name metadata, and type sense, and versioned in the way
134-
* system linkers understand.
135-
*/
136-
13778
pub fn find_crate_name(sess: Option<&Session>,
13879
attrs: &[ast::Attribute],
13980
input: &Input) -> String {
@@ -195,182 +136,6 @@ pub fn build_link_meta(sess: &Session,
195136
return r;
196137
}
197138

198-
fn truncated_hash_result(symbol_hasher: &mut Sha256) -> String {
199-
let output = symbol_hasher.result_bytes();
200-
// 64 bits should be enough to avoid collisions.
201-
output[.. 8].to_hex().to_string()
202-
}
203-
204-
pub fn def_to_string(_tcx: &TyCtxt, did: DefId) -> String {
205-
format!("{}:{}", did.krate, did.index.as_usize())
206-
}
207-
208-
// This calculates STH for a symbol, as defined above
209-
fn symbol_hash<'tcx>(tcx: &TyCtxt<'tcx>,
210-
symbol_hasher: &mut Sha256,
211-
t: Ty<'tcx>,
212-
link_meta: &LinkMeta)
213-
-> String {
214-
// NB: do *not* use abbrevs here as we want the symbol names
215-
// to be independent of one another in the crate.
216-
217-
symbol_hasher.reset();
218-
symbol_hasher.input_str(&link_meta.crate_name);
219-
symbol_hasher.input_str("-");
220-
symbol_hasher.input_str(link_meta.crate_hash.as_str());
221-
symbol_hasher.input_str(&tcx.sess.crate_disambiguator.borrow()[..]);
222-
symbol_hasher.input_str("-");
223-
symbol_hasher.input(&tcx.sess.cstore.encode_type(tcx, t, def_to_string));
224-
// Prefix with 'h' so that it never blends into adjacent digits
225-
let mut hash = String::from("h");
226-
hash.push_str(&truncated_hash_result(symbol_hasher));
227-
hash
228-
}
229-
230-
fn get_symbol_hash<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, t: Ty<'tcx>) -> String {
231-
if let Some(h) = ccx.type_hashcodes().borrow().get(&t) {
232-
return h.to_string()
233-
}
234-
235-
let mut symbol_hasher = ccx.symbol_hasher().borrow_mut();
236-
let hash = symbol_hash(ccx.tcx(), &mut *symbol_hasher, t, ccx.link_meta());
237-
ccx.type_hashcodes().borrow_mut().insert(t, hash.clone());
238-
hash
239-
}
240-
241-
242-
// Name sanitation. LLVM will happily accept identifiers with weird names, but
243-
// gas doesn't!
244-
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
245-
pub fn sanitize(s: &str) -> String {
246-
let mut result = String::new();
247-
for c in s.chars() {
248-
match c {
249-
// Escape these with $ sequences
250-
'@' => result.push_str("$SP$"),
251-
'*' => result.push_str("$BP$"),
252-
'&' => result.push_str("$RF$"),
253-
'<' => result.push_str("$LT$"),
254-
'>' => result.push_str("$GT$"),
255-
'(' => result.push_str("$LP$"),
256-
')' => result.push_str("$RP$"),
257-
',' => result.push_str("$C$"),
258-
259-
// '.' doesn't occur in types and functions, so reuse it
260-
// for ':' and '-'
261-
'-' | ':' => result.push('.'),
262-
263-
// These are legal symbols
264-
'a' ... 'z'
265-
| 'A' ... 'Z'
266-
| '0' ... '9'
267-
| '_' | '.' | '$' => result.push(c),
268-
269-
_ => {
270-
result.push('$');
271-
for c in c.escape_unicode().skip(1) {
272-
match c {
273-
'{' => {},
274-
'}' => result.push('$'),
275-
c => result.push(c),
276-
}
277-
}
278-
}
279-
}
280-
}
281-
282-
// Underscore-qualify anything that didn't start as an ident.
283-
if !result.is_empty() &&
284-
result.as_bytes()[0] != '_' as u8 &&
285-
! (result.as_bytes()[0] as char).is_xid_start() {
286-
return format!("_{}", &result[..]);
287-
}
288-
289-
return result;
290-
}
291-
292-
pub fn mangle<PI: Iterator<Item=InternedString>>(path: PI, hash: Option<&str>) -> String {
293-
// Follow C++ namespace-mangling style, see
294-
// http://en.wikipedia.org/wiki/Name_mangling for more info.
295-
//
296-
// It turns out that on OSX you can actually have arbitrary symbols in
297-
// function names (at least when given to LLVM), but this is not possible
298-
// when using unix's linker. Perhaps one day when we just use a linker from LLVM
299-
// we won't need to do this name mangling. The problem with name mangling is
300-
// that it seriously limits the available characters. For example we can't
301-
// have things like &T in symbol names when one would theoretically
302-
// want them for things like impls of traits on that type.
303-
//
304-
// To be able to work on all platforms and get *some* reasonable output, we
305-
// use C++ name-mangling.
306-
307-
let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested
308-
309-
fn push(n: &mut String, s: &str) {
310-
let sani = sanitize(s);
311-
n.push_str(&format!("{}{}", sani.len(), sani));
312-
}
313-
314-
// First, connect each component with <len, name> pairs.
315-
for data in path {
316-
push(&mut n, &data);
317-
}
318-
319-
if let Some(s) = hash {
320-
push(&mut n, s)
321-
}
322-
323-
n.push('E'); // End name-sequence.
324-
n
325-
}
326-
327-
pub fn exported_name(path: DefPath, hash: &str) -> String {
328-
let path = path.into_iter()
329-
.map(|e| e.data.as_interned_str());
330-
mangle(path, Some(hash))
331-
}
332-
333-
pub fn mangle_exported_name<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, path: DefPath,
334-
t: Ty<'tcx>, id: ast::NodeId) -> String {
335-
let mut hash = get_symbol_hash(ccx, t);
336-
337-
// Paths can be completely identical for different nodes,
338-
// e.g. `fn foo() { { fn a() {} } { fn a() {} } }`, so we
339-
// generate unique characters from the node id. For now
340-
// hopefully 3 characters is enough to avoid collisions.
341-
const EXTRA_CHARS: &'static str =
342-
"abcdefghijklmnopqrstuvwxyz\
343-
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
344-
0123456789";
345-
let id = id as usize;
346-
let extra1 = id % EXTRA_CHARS.len();
347-
let id = id / EXTRA_CHARS.len();
348-
let extra2 = id % EXTRA_CHARS.len();
349-
let id = id / EXTRA_CHARS.len();
350-
let extra3 = id % EXTRA_CHARS.len();
351-
hash.push(EXTRA_CHARS.as_bytes()[extra1] as char);
352-
hash.push(EXTRA_CHARS.as_bytes()[extra2] as char);
353-
hash.push(EXTRA_CHARS.as_bytes()[extra3] as char);
354-
355-
exported_name(path, &hash[..])
356-
}
357-
358-
pub fn mangle_internal_name_by_type_and_seq<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
359-
t: Ty<'tcx>,
360-
name: &str) -> String {
361-
let path = [token::intern(&t.to_string()).as_str(), gensym_name(name).as_str()];
362-
let hash = get_symbol_hash(ccx, t);
363-
mangle(path.iter().cloned(), Some(&hash[..]))
364-
}
365-
366-
pub fn mangle_internal_name_by_path_and_seq(path: DefPath, flav: &str) -> String {
367-
let names =
368-
path.into_iter()
369-
.map(|e| e.data.as_interned_str())
370-
.chain(once(gensym_name(flav).as_str())); // append unique version of "flav"
371-
mangle(names, None)
372-
}
373-
374139
pub fn get_linker(sess: &Session) -> (String, Command) {
375140
if let Some(ref linker) = sess.opts.cg.linker {
376141
(linker.clone(), Command::new(linker))

src/librustc_trans/back/symbol_names.rs

+88-4
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,8 @@ use rustc::front::map::definitions::DefPath;
107107

108108
use std::fmt::Write;
109109
use syntax::ast;
110-
use syntax::parse::token;
110+
use syntax::parse::token::{self, InternedString};
111111
use serialize::hex::ToHex;
112-
use super::link;
113112

114113
pub fn def_id_to_string<'tcx>(tcx: &ty::TyCtxt<'tcx>, def_id: DefId) -> String {
115114

@@ -206,7 +205,7 @@ fn exported_name_with_opt_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
206205
path.push(token::intern_and_get_ident(suffix));
207206
}
208207

209-
link::mangle(path.into_iter(), Some(&hash[..]))
208+
mangle(path.into_iter(), Some(&hash[..]))
210209
}
211210

212211
pub fn exported_name<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
@@ -233,5 +232,90 @@ pub fn internal_name_from_type_and_suffix<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>
233232
let path = [token::intern(&t.to_string()).as_str(),
234233
gensym_name(suffix).as_str()];
235234
let hash = get_symbol_hash(ccx, &Vec::new(), cstore::LOCAL_CRATE, &[t]);
236-
link::mangle(path.iter().cloned(), Some(&hash[..]))
235+
mangle(path.iter().cloned(), Some(&hash[..]))
236+
}
237+
238+
// Name sanitation. LLVM will happily accept identifiers with weird names, but
239+
// gas doesn't!
240+
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
241+
pub fn sanitize(s: &str) -> String {
242+
let mut result = String::new();
243+
for c in s.chars() {
244+
match c {
245+
// Escape these with $ sequences
246+
'@' => result.push_str("$SP$"),
247+
'*' => result.push_str("$BP$"),
248+
'&' => result.push_str("$RF$"),
249+
'<' => result.push_str("$LT$"),
250+
'>' => result.push_str("$GT$"),
251+
'(' => result.push_str("$LP$"),
252+
')' => result.push_str("$RP$"),
253+
',' => result.push_str("$C$"),
254+
255+
// '.' doesn't occur in types and functions, so reuse it
256+
// for ':' and '-'
257+
'-' | ':' => result.push('.'),
258+
259+
// These are legal symbols
260+
'a' ... 'z'
261+
| 'A' ... 'Z'
262+
| '0' ... '9'
263+
| '_' | '.' | '$' => result.push(c),
264+
265+
_ => {
266+
result.push('$');
267+
for c in c.escape_unicode().skip(1) {
268+
match c {
269+
'{' => {},
270+
'}' => result.push('$'),
271+
c => result.push(c),
272+
}
273+
}
274+
}
275+
}
276+
}
277+
278+
// Underscore-qualify anything that didn't start as an ident.
279+
if !result.is_empty() &&
280+
result.as_bytes()[0] != '_' as u8 &&
281+
! (result.as_bytes()[0] as char).is_xid_start() {
282+
return format!("_{}", &result[..]);
283+
}
284+
285+
return result;
286+
}
287+
288+
pub fn mangle<PI: Iterator<Item=InternedString>>(path: PI, hash: Option<&str>) -> String {
289+
// Follow C++ namespace-mangling style, see
290+
// http://en.wikipedia.org/wiki/Name_mangling for more info.
291+
//
292+
// It turns out that on OSX you can actually have arbitrary symbols in
293+
// function names (at least when given to LLVM), but this is not possible
294+
// when using unix's linker. Perhaps one day when we just use a linker from LLVM
295+
// we won't need to do this name mangling. The problem with name mangling is
296+
// that it seriously limits the available characters. For example we can't
297+
// have things like &T in symbol names when one would theoretically
298+
// want them for things like impls of traits on that type.
299+
//
300+
// To be able to work on all platforms and get *some* reasonable output, we
301+
// use C++ name-mangling.
302+
303+
let mut n = String::from("_ZN"); // _Z == Begin name-sequence, N == nested
304+
305+
fn push(n: &mut String, s: &str) {
306+
let sani = sanitize(s);
307+
n.push_str(&format!("{}{}", sani.len(), sani));
308+
}
309+
310+
// First, connect each component with <len, name> pairs.
311+
for data in path {
312+
push(&mut n, &data);
313+
}
314+
315+
if let Some(s) = hash {
316+
push(&mut n, s)
317+
}
318+
319+
n.push('E'); // End name-sequence.
320+
n
237321
}

0 commit comments

Comments
 (0)