-
Notifications
You must be signed in to change notification settings - Fork 13.3k
Split symbol interner into static unsynchronized and dynamic synchronized parts #79425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,4 @@ tracing = "0.1" | |
sha-1 = "0.9" | ||
sha2 = "0.9" | ||
md-5 = "0.9" | ||
phf = "0.8" |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1452,17 +1452,55 @@ impl Symbol { | |||||
Symbol(SymbolIndex::from_u32(n)) | ||||||
} | ||||||
|
||||||
/// Maps a string to its interned representation, but only if this string is a known | ||||||
/// (static) symbol. | ||||||
pub fn intern_static(string: &str) -> Option<Symbol> { | ||||||
if let Some(symbol) = STATIC_SYMBOLS_PHF.get(string) { Some(*symbol) } else { None } | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
/// Maps a string to its interned representation. | ||||||
// #[inline(never)] - There is no benefit to inlining this function (verified with | ||||||
// performance measurements), and a reduction in overall code size by disabling inlining. | ||||||
#[inline(never)] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this necessary? |
||||||
pub fn intern(string: &str) -> Self { | ||||||
with_interner(|interner| interner.intern(string)) | ||||||
if let Some(symbol) = Symbol::intern_static(string) { | ||||||
symbol | ||||||
} else { | ||||||
with_interner(|interner| interner.intern_dynamic(string)) | ||||||
} | ||||||
} | ||||||
|
||||||
pub fn is_static(self) -> bool { | ||||||
self.0.as_u32() < DYNAMIC_SYMBOL_BASE | ||||||
} | ||||||
|
||||||
/// Translates the `Symbol` to a string, but only if this `Symbol` | ||||||
/// was originally interned as a static symbol. | ||||||
pub fn as_str_static(self) -> Option<&'static str> { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
let symbol_index = self.0.as_usize(); | ||||||
if symbol_index < STATIC_SYMBOLS.len() { | ||||||
// This is a well-known symbol. The symbol string is stored in a static field. | ||||||
// There is no need to lock the interner. | ||||||
Some(STATIC_SYMBOLS[symbol_index]) | ||||||
} else { | ||||||
None | ||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
/// Convert to a `SymbolStr`. This is a slowish operation because it | ||||||
/// requires locking the symbol interner. | ||||||
/// | ||||||
/// If the symbol is a statically-interned symbol (interned at rustc compile time), | ||||||
/// then this operation is fast, and does not acquire any locks. | ||||||
pub fn as_str(self) -> SymbolStr { | ||||||
with_interner(|interner| unsafe { | ||||||
SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get(self)) } | ||||||
}) | ||||||
if let Some(string) = self.as_str_static() { | ||||||
SymbolStr { string } | ||||||
} else { | ||||||
// This is a dynamic string. The string is stored in the Interner. | ||||||
with_interner(|interner| unsafe { | ||||||
SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get_dynamic(self)) } | ||||||
}) | ||||||
} | ||||||
} | ||||||
|
||||||
pub fn as_u32(self) -> u32 { | ||||||
|
@@ -1528,6 +1566,13 @@ impl<CTX> ToStableHashKey<CTX> for Symbol { | |||||
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 | ||||||
// found that to regress performance up to 2% in some cases. This might be | ||||||
// revisited after further improvements to `indexmap`. | ||||||
// | ||||||
// `Interner` does not contain any of the statically-known symbol names. | ||||||
// It does not contain any of the strings defined in the `Keyword` or | ||||||
// `Symbol` sections. Since those strings are statically-known, we just | ||||||
// look them up in a (static) table, when needed. See | ||||||
// `STATIC_SYMBOLS` and `STATIC_SYMBOLS_PHF`, which are both generated by | ||||||
// `compiler/rustc_macros/src/symbols.rs`. | ||||||
#[derive(Default)] | ||||||
pub struct Interner { | ||||||
arena: DroplessArena, | ||||||
|
@@ -1536,21 +1581,28 @@ pub struct Interner { | |||||
} | ||||||
|
||||||
impl Interner { | ||||||
fn prefill(init: &[&'static str]) -> Self { | ||||||
Interner { | ||||||
strings: init.into(), | ||||||
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), | ||||||
..Default::default() | ||||||
pub fn intern(&mut self, string: &str) -> Symbol { | ||||||
if let Some(sym) = Symbol::intern_static(string) { | ||||||
sym | ||||||
} else { | ||||||
self.intern_dynamic(string) | ||||||
} | ||||||
} | ||||||
|
||||||
#[inline] | ||||||
pub fn intern(&mut self, string: &str) -> Symbol { | ||||||
fn intern_dynamic(&mut self, string: &str) -> Symbol { | ||||||
// The caller should have already checked for static symbols. | ||||||
// Failure to do so is a bug, since this code will mistakenly | ||||||
// intern the static symbol, resulting in a bogus symbol index. | ||||||
// (The whole point of this design is that you can do static | ||||||
// lookups without acquiring the thread-local Interner, so if | ||||||
// we got here with a static symbol, we goofed.) | ||||||
debug_assert!(Symbol::intern_static(string).is_none()); | ||||||
|
||||||
if let Some(&name) = self.names.get(string) { | ||||||
return name; | ||||||
} | ||||||
|
||||||
let name = Symbol::new(self.strings.len() as u32); | ||||||
let name = Symbol::new(DYNAMIC_SYMBOL_BASE + self.strings.len() as u32); | ||||||
|
||||||
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be | ||||||
// UTF-8. | ||||||
|
@@ -1567,7 +1619,16 @@ impl Interner { | |||||
// Get the symbol as a string. `Symbol::as_str()` should be used in | ||||||
// preference to this function. | ||||||
pub fn get(&self, symbol: Symbol) -> &str { | ||||||
self.strings[symbol.0.as_usize()] | ||||||
if let Some(string) = symbol.as_str_static() { | ||||||
string | ||||||
} else { | ||||||
&self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize] | ||||||
} | ||||||
} | ||||||
|
||||||
fn get_dynamic(&self, symbol: Symbol) -> &str { | ||||||
debug_assert!(!symbol.is_static()); | ||||||
self.strings[(symbol.as_u32() - DYNAMIC_SYMBOL_BASE) as usize] | ||||||
} | ||||||
} | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.