Skip to content

Split symbol interner into static unsynchronized and dynamic synchronized parts #79425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3913,6 +3913,7 @@ dependencies = [
name = "rustc_macros"
version = "0.1.0"
dependencies = [
"phf_codegen",
"proc-macro2",
"quote",
"syn",
Expand Down Expand Up @@ -4200,6 +4201,7 @@ version = "0.0.0"
dependencies = [
"cfg-if 0.1.10",
"md-5",
"phf",
"rustc_arena",
"rustc_data_structures",
"rustc_index",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ synstructure = "0.12.1"
syn = { version = "1", features = ["full"] }
proc-macro2 = "1"
quote = "1"
phf_codegen = "0.8"
92 changes: 57 additions & 35 deletions compiler/rustc_macros/src/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
}
};

let mut keyword_stream = quote! {};
let mut symbols_stream = quote! {};
let mut prefill_stream = quote! {};
let mut counter = 0u32;
let mut keys =
HashMap::<String, Span>::with_capacity(input.keywords.len() + input.symbols.len() + 10);
let mut prev_key: Option<(Span, String)> = None;
Expand All @@ -136,8 +132,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
if let Some(prev_span) = keys.get(str) {
errors.error(span, format!("Symbol `{}` is duplicated", str));
errors.error(*prev_span, format!("location of previous definition"));
Err(())
} else {
keys.insert(str.to_string(), span);
Ok(())
}
};

Expand All @@ -151,51 +149,75 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
prev_key = Some((span, str.to_string()));
};

let mut symbol_strings: Vec<String> = Vec::new();

// Generate the listed keywords.
let mut keyword_stream = quote! {};
for keyword in input.keywords.iter() {
let name = &keyword.name;
let value = &keyword.value;
let value_string = value.value();
check_dup(keyword.name.span(), &value_string, &mut errors);
prefill_stream.extend(quote! {
#value,
});
let symbol_index = symbol_strings.len() as u32;
if check_dup(keyword.name.span(), &value_string, &mut errors).is_ok() {
// Only add an entry to `symbol_strings` if it is not a duplicate.
// If it is a duplicate, then compilation will fail. However, we still
// want to avoid panicking, if a duplicate is detected.
symbol_strings.push(value_string);
}
keyword_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#symbol_index);
});
counter += 1;
}

// Generate symbols for the strings "0", "1", ..., "9".
let digits_base = symbol_strings.len() as u32;
for n in 0..10 {
let n_string = n.to_string();
if check_dup(Span::call_site(), &n_string, &mut errors).is_ok() {
symbol_strings.push(n_string);
}
}

// Generate the listed symbols.
let mut symbols_stream = quote! {};
for symbol in input.symbols.iter() {
let name = &symbol.name;
let name_string = symbol.name.to_string();
check_order(symbol.name.span(), &name_string, &mut errors);
let value = match &symbol.value {
Some(value) => value.value(),
None => name.to_string(),
None => name_string,
};
check_dup(symbol.name.span(), &value, &mut errors);
check_order(symbol.name.span(), &name.to_string(), &mut errors);

prefill_stream.extend(quote! {
#value,
});
let symbol_index = symbol_strings.len() as u32;
if check_dup(symbol.name.span(), &value, &mut errors).is_ok() {
// Only add an entry to `symbol_strings` if it is not a duplicate.
// If it is a duplicate, then compilation will fail. However, we still
// want to avoid panicking, if a duplicate is detected.
symbol_strings.push(value);
}

symbols_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#symbol_index);
});
counter += 1;
}

// Generate symbols for the strings "0", "1", ..., "9".
let digits_base = counter;
counter += 10;
for n in 0..10 {
let n = n.to_string();
check_dup(Span::call_site(), &n, &mut errors);
prefill_stream.extend(quote! {
#n,
});
// We have finished collecting symbol strings.
let static_symbols_len = symbol_strings.len();
let dynamic_symbol_base = symbol_strings.len() as u32;
let symbol_strings = symbol_strings; // no more mutation

// Build the body of STATIC_SYMBOLS.
let symbol_strings_tokens: TokenStream = symbol_strings.iter().map(|s| quote!(#s,)).collect();

// Build the PHF map. This translates from strings to Symbol values.
let mut phf_map = phf_codegen::Map::<&str>::new();
for (symbol_index, symbol) in symbol_strings.iter().enumerate() {
phf_map.entry(symbol, format!("Symbol::new({})", symbol_index as u32).as_str());
}
let _ = counter; // for future use
let phf_map_built = phf_map.build();
let phf_map_text = phf_map_built.to_string();
let phf_map_expr = syn::parse_str::<syn::Expr>(&phf_map_text).unwrap();

let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #digits_base;
Expand All @@ -215,13 +237,13 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
#symbols_stream
}

impl Interner {
pub fn fresh() -> Self {
Interner::prefill(&[
#prefill_stream
])
}
}
const DYNAMIC_SYMBOL_BASE: u32 = #dynamic_symbol_base;

static STATIC_SYMBOLS: [&str; #static_symbols_len as usize] = [
#symbol_strings_tokens
];

static STATIC_SYMBOLS_PHF: ::phf::Map<&'static str, Symbol> = #phf_map_expr;
};

(output, errors.list)
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ tracing = "0.1"
sha-1 = "0.9"
sha2 = "0.9"
md-5 = "0.9"
phf = "0.8"
2 changes: 1 addition & 1 deletion compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub struct SessionGlobals {
impl SessionGlobals {
pub fn new(edition: Edition) -> SessionGlobals {
SessionGlobals {
symbol_interner: Lock::new(symbol::Interner::fresh()),
symbol_interner: Lock::new(symbol::Interner::default()),
span_interner: Lock::new(span_encoding::SpanInterner::default()),
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
source_map: Lock::new(None),
Expand Down
87 changes: 74 additions & 13 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1452,17 +1452,55 @@ impl Symbol {
Symbol(SymbolIndex::from_u32(n))
}

/// Maps a string to its interned representation, but only if this string is a known
/// (static) symbol.
pub fn intern_static(string: &str) -> Option<Symbol> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub fn intern_static(string: &str) -> Option<Symbol> {
fn intern_static(string: &str) -> Option<Symbol> {

if let Some(symbol) = STATIC_SYMBOLS_PHF.get(string) { Some(*symbol) } else { None }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if let Some(symbol) = STATIC_SYMBOLS_PHF.get(string) { Some(*symbol) } else { None }
STATIC_SYMBOLS_PHF.get(string).copied()

}

/// Maps a string to its interned representation.
// #[inline(never)] - There is no benefit to inlining this function (verified with
// performance measurements), and a reduction in overall code size by disabling inlining.
#[inline(never)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this necessary?

pub fn intern(string: &str) -> Self {
with_interner(|interner| interner.intern(string))
if let Some(symbol) = Symbol::intern_static(string) {
symbol
} else {
with_interner(|interner| interner.intern_dynamic(string))
}
}

pub fn is_static(self) -> bool {
self.0.as_u32() < DYNAMIC_SYMBOL_BASE
}

/// Translates the `Symbol` to a string, but only if this `Symbol`
/// was originally interned as a static symbol.
pub fn as_str_static(self) -> Option<&'static str> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub fn as_str_static(self) -> Option<&'static str> {
fn as_str_static(self) -> Option<&'static str> {

let symbol_index = self.0.as_usize();
if symbol_index < STATIC_SYMBOLS.len() {
// This is a well-known symbol. The symbol string is stored in a static field.
// There is no need to lock the interner.
Some(STATIC_SYMBOLS[symbol_index])
} else {
None
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
STATIC_SYMBOLS.get(self.0.as_usize()).copied()

}

/// Convert to a `SymbolStr`. This is a slowish operation because it
/// requires locking the symbol interner.
///
/// If the symbol is a statically-interned symbol (interned at rustc compile time),
/// then this operation is fast, and does not acquire any locks.
pub fn as_str(self) -> SymbolStr {
with_interner(|interner| unsafe {
SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get(self)) }
})
if let Some(string) = self.as_str_static() {
SymbolStr { string }
} else {
// This is a dynamic string. The string is stored in the Interner.
with_interner(|interner| unsafe {
SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get_dynamic(self)) }
})
}
}

pub fn as_u32(self) -> u32 {
Expand Down Expand Up @@ -1528,6 +1566,13 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
// found that to regress performance up to 2% in some cases. This might be
// revisited after further improvements to `indexmap`.
//
// `Interner` does not contain any of the statically-known symbol names.
// It does not contain any of the strings defined in the `Keyword` or
// `Symbol` sections. Since those strings are statically-known, we just
// look them up in a (static) table, when needed. See
// `STATIC_SYMBOLS` and `STATIC_SYMBOLS_PHF`, which are both generated by
// `compiler/rustc_macros/src/symbols.rs`.
#[derive(Default)]
pub struct Interner {
arena: DroplessArena,
Expand All @@ -1536,21 +1581,28 @@ pub struct Interner {
}

impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner {
strings: init.into(),
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
..Default::default()
pub fn intern(&mut self, string: &str) -> Symbol {
if let Some(sym) = Symbol::intern_static(string) {
sym
} else {
self.intern_dynamic(string)
}
}

#[inline]
pub fn intern(&mut self, string: &str) -> Symbol {
fn intern_dynamic(&mut self, string: &str) -> Symbol {
// The caller should have already checked for static symbols.
// Failure to do so is a bug, since this code will mistakenly
// intern the static symbol, resulting in a bogus symbol index.
// (The whole point of this design is that you can do static
// lookups without acquiring the thread-local Interner, so if
// we got here with a static symbol, we goofed.)
debug_assert!(Symbol::intern_static(string).is_none());

if let Some(&name) = self.names.get(string) {
return name;
}

let name = Symbol::new(self.strings.len() as u32);
let name = Symbol::new(DYNAMIC_SYMBOL_BASE + self.strings.len() as u32);

// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
// UTF-8.
Expand All @@ -1567,7 +1619,16 @@ impl Interner {
// Get the symbol as a string. `Symbol::as_str()` should be used in
// preference to this function.
pub fn get(&self, symbol: Symbol) -> &str {
self.strings[symbol.0.as_usize()]
if let Some(string) = symbol.as_str_static() {
string
} else {
&self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize]
}
}

fn get_dynamic(&self, symbol: Symbol) -> &str {
debug_assert!(!symbol.is_static());
self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize]
}
}

Expand Down
78 changes: 71 additions & 7 deletions compiler/rustc_span/src/symbol/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ use crate::{edition, SessionGlobals};
#[test]
fn interner_tests() {
let mut i: Interner = Interner::default();
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
let dog = i.intern("dog");
// re-use gets the same entry:
assert_eq!(i.intern("dog"), Symbol::new(0));
assert_eq!(i.intern("dog").as_u32(), dog.as_u32());
// different string gets a different #:
assert_eq!(i.intern("cat"), Symbol::new(1));
assert_eq!(i.intern("cat"), Symbol::new(1));
// dog is still at zero
assert_eq!(i.intern("dog"), Symbol::new(0));
let cat = i.intern("cat");
assert_ne!(dog.as_u32(), cat.as_u32());
assert_eq!(i.intern("cat").as_u32(), cat.as_u32());
assert_eq!(i.intern("dog").as_u32(), dog.as_u32());
}

#[test]
Expand All @@ -23,3 +22,68 @@ fn without_first_quote_test() {
assert_eq!(i.without_first_quote().name, kw::Break);
});
}

#[test]
fn test_static_symbols() {
assert_eq!(Symbol::intern_static(""), Some(kw::Invalid));
assert_eq!(Symbol::intern_static("not in the static table"), None);
assert!(Symbol::intern_static("fn").is_some()); // don't care about exact index

// check round-tripping
for &string in ["as", "fn", "let", "trait", "size_of_val"].iter() {
let sym = Symbol::intern_static(string).unwrap();
assert_eq!(string, &*sym.as_str(), "sym #{}", sym.0.as_u32());
}
}

#[test]
fn test_ident_is_special() {
for &s in [kw::Invalid, kw::PathRoot, kw::DollarCrate, kw::Underscore].iter() {
let ident = Ident::with_dummy_span(s);
assert_eq!(ident.is_special(), true, "s = {:?}", s);
}

for &s in [kw::As, kw::Break, kw::UnderscoreLifetime].iter() {
let ident = Ident::with_dummy_span(s);
assert_eq!(ident.is_special(), false, "s = {:?}", s);
}
}

#[test]
fn test_symbol_as_str() {
SESSION_GLOBALS.set(&SessionGlobals::new(edition::Edition::Edition2018), || {
for &(sym, string) in [
(kw::Invalid, ""),
(kw::PathRoot, "{{root}}"),
(kw::DollarCrate, "$crate"),
(kw::As, "as"),
(kw::Break, "break"),
(kw::While, "while"),
(kw::Union, "union"),
(sym::Alignment, "Alignment"),
(sym::Arc, "Arc"),
(sym::zmm_reg, "zmm_reg"),
(sym::i64, "i64"),
]
.iter()
{
let as_str = sym.as_str();
assert_eq!(&*as_str, string);

let sym2 = Symbol::intern(string);
assert_eq!(sym, sym2, "sym={} sym2={}", sym.as_u32(), sym2.as_u32());
}

let colon = Symbol::intern(":");
assert_eq!(&*colon.as_str(), ":");
});
}

#[test]
fn test_dynamic_symbols() {
crate::with_session_globals(crate::edition::Edition::Edition2018, || {
let s1 = Symbol::intern("fuzzy wuzzy");
assert!(!s1.is_static());
assert_eq!(&*s1.as_str(), "fuzzy wuzzy");
});
}
Loading