Skip to content

Commit a9eb1eb

Browse files
committed
Auto merge of #51394 - nnethercote:NCA-depths, r=nikomatsakis
Use scope tree depths to speed up `nearest_common_ancestor`. This patch adds depth markings to all entries in the `ScopeTree`'s `parent_map`. This change increases memory usage somewhat, but permits a much faster algorithm to be used: - If one scope has a greater depth than the other, the deeper scope is moved upward until they are at equal depths. - Then we move the two scopes upward in lockstep until they match. This avoids the need to keep track of which scopes have already been seen, which was the major part of the cost of the old algorithm. It also reduces the number of child-to-parent moves (which are hash table lookups) when the scopes start at different levels, because it never goes past the nearest common ancestor the way the old algorithm did. Finally, the case where one of the scopes is the root is now handled in advance, because that is moderately common and lets us skip everything. This change speeds up runs of several rust-perf benchmarks, the best by 6%. A selection of the bigger improvements: ``` clap-rs-check avg: -2.6% min: -6.6% max: 0.0% syn-check avg: -2.2% min: -5.0% max: 0.0% style-servo-check avg: -2.9%? min: -4.8%? max: 0.0%? cargo-check avg: -1.3% min: -2.8% max: 0.0% sentry-cli-check avg: -1.0% min: -2.1% max: 0.0% webrender-check avg: -0.9% min: -2.0% max: 0.0% style-servo avg: -0.9%? min: -1.8%? max: -0.0%? ripgrep-check avg: -0.7% min: -1.8% max: 0.1% clap-rs avg: -0.9% min: -1.6% max: -0.2% regex-check avg: -0.2% min: -1.3% max: 0.1% syn avg: -0.6% min: -1.3% max: 0.1% hyper-check avg: -0.5% min: -1.1% max: 0.0% ``` The idea came from multiple commenters on my blog and on Reddit. Thank you! r? @nikomatsakis
2 parents c131bdc + 5c36e01 commit a9eb1eb

File tree

2 files changed

+74
-63
lines changed

2 files changed

+74
-63
lines changed

src/librustc/middle/region.rs

+70-60
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use ty;
2222

2323
use std::fmt;
2424
use std::mem;
25-
use rustc_data_structures::small_vec::SmallVec;
2625
use rustc_data_structures::sync::Lrc;
2726
use syntax::codemap;
2827
use syntax::ast;
@@ -280,6 +279,8 @@ impl Scope {
280279
}
281280
}
282281

282+
pub type ScopeDepth = u32;
283+
283284
/// The region scope tree encodes information about region relationships.
284285
#[derive(Default, Debug)]
285286
pub struct ScopeTree {
@@ -297,7 +298,7 @@ pub struct ScopeTree {
297298
/// conditional expression or repeating block. (Note that the
298299
/// enclosing scope id for the block associated with a closure is
299300
/// the closure itself.)
300-
parent_map: FxHashMap<Scope, Scope>,
301+
parent_map: FxHashMap<Scope, (Scope, ScopeDepth)>,
301302

302303
/// `var_map` maps from a variable or binding id to the block in
303304
/// which that variable is declared.
@@ -415,11 +416,12 @@ pub struct Context {
415416
/// details.
416417
root_id: Option<hir::ItemLocalId>,
417418

418-
/// the scope that contains any new variables declared
419-
var_parent: Option<Scope>,
419+
/// The scope that contains any new variables declared, plus its depth in
420+
/// the scope tree.
421+
var_parent: Option<(Scope, ScopeDepth)>,
420422

421-
/// region parent of expressions etc
422-
parent: Option<Scope>,
423+
/// Region parent of expressions, etc., plus its depth in the scope tree.
424+
parent: Option<(Scope, ScopeDepth)>,
423425
}
424426

425427
struct RegionResolutionVisitor<'a, 'tcx: 'a> {
@@ -499,7 +501,7 @@ impl<'tcx> Visitor<'tcx> for ExprLocatorVisitor {
499501
}
500502

501503
impl<'tcx> ScopeTree {
502-
pub fn record_scope_parent(&mut self, child: Scope, parent: Option<Scope>) {
504+
pub fn record_scope_parent(&mut self, child: Scope, parent: Option<(Scope, ScopeDepth)>) {
503505
debug!("{:?}.parent = {:?}", child, parent);
504506

505507
if let Some(p) = parent {
@@ -515,7 +517,7 @@ impl<'tcx> ScopeTree {
515517

516518
pub fn each_encl_scope<E>(&self, mut e:E) where E: FnMut(Scope, Scope) {
517519
for (&child, &parent) in &self.parent_map {
518-
e(child, parent)
520+
e(child, parent.0)
519521
}
520522
}
521523

@@ -558,7 +560,7 @@ impl<'tcx> ScopeTree {
558560

559561
pub fn opt_encl_scope(&self, id: Scope) -> Option<Scope> {
560562
//! Returns the narrowest scope that encloses `id`, if any.
561-
self.parent_map.get(&id).cloned()
563+
self.parent_map.get(&id).cloned().map(|(p, _)| p)
562564
}
563565

564566
#[allow(dead_code)] // used in cfg
@@ -590,7 +592,7 @@ impl<'tcx> ScopeTree {
590592
// returned.
591593
let mut id = Scope::Node(expr_id);
592594

593-
while let Some(&p) = self.parent_map.get(&id) {
595+
while let Some(&(p, _)) = self.parent_map.get(&id) {
594596
match p.data() {
595597
ScopeData::Destruction(..) => {
596598
debug!("temporary_scope({:?}) = {:?} [enclosing]",
@@ -658,57 +660,61 @@ impl<'tcx> ScopeTree {
658660
}
659661
}
660662

661-
/// Finds the nearest common ancestor (if any) of two scopes. That is, finds the smallest
662-
/// scope which is greater than or equal to both `scope_a` and `scope_b`.
663-
pub fn nearest_common_ancestor(&self,
664-
scope_a: Scope,
665-
scope_b: Scope)
666-
-> Scope {
663+
/// Finds the nearest common ancestor of two scopes. That is, finds the
664+
/// smallest scope which is greater than or equal to both `scope_a` and
665+
/// `scope_b`.
666+
pub fn nearest_common_ancestor(&self, scope_a: Scope, scope_b: Scope) -> Scope {
667667
if scope_a == scope_b { return scope_a; }
668668

669-
// Process the lists in tandem from the innermost scope, recording the
670-
// scopes seen so far. The first scope that comes up for a second time
671-
// is the nearest common ancestor.
672-
//
673-
// Note: another way to compute the nearest common ancestor is to get
674-
// the full scope chain for both scopes and then compare the chains to
675-
// find the first scope in a common tail. But getting a parent scope
676-
// requires a hash table lookup, and we often have very long scope
677-
// chains (10s or 100s of scopes) that only differ by a few elements at
678-
// the start. So this algorithm is faster.
679-
680-
let mut ma = Some(&scope_a);
681-
let mut mb = Some(&scope_b);
682-
683-
// A HashSet<Scope> is a more obvious choice for these, but SmallVec is
684-
// faster because the set size is normally small so linear search is
685-
// as good or better than a hash table lookup, plus the size is usually
686-
// small enough to avoid a heap allocation.
687-
let mut seen_a: SmallVec<[&Scope; 32]> = SmallVec::new();
688-
let mut seen_b: SmallVec<[&Scope; 32]> = SmallVec::new();
669+
let mut a = scope_a;
670+
let mut b = scope_b;
689671

690-
loop {
691-
if let Some(a) = ma {
692-
if seen_b.iter().any(|s| *s == a) {
693-
return *a;
694-
}
695-
seen_a.push(a);
696-
ma = self.parent_map.get(&a);
697-
}
672+
// Get the depth of each scope's parent. If either scope has no parent,
673+
// it must be the root, which means we can stop immediately because the
674+
// root must be the nearest common ancestor. (In practice, this is
675+
// moderately common.)
676+
let (parent_a, parent_a_depth) = match self.parent_map.get(&a) {
677+
Some(pd) => *pd,
678+
None => return a,
679+
};
680+
let (parent_b, parent_b_depth) = match self.parent_map.get(&b) {
681+
Some(pd) => *pd,
682+
None => return b,
683+
};
698684

699-
if let Some(b) = mb {
700-
if seen_a.iter().any(|s| *s == b) {
701-
return *b;
702-
}
703-
seen_b.push(b);
704-
mb = self.parent_map.get(&b);
685+
if parent_a_depth > parent_b_depth {
686+
// `a` is lower than `b`. Move `a` up until it's at the same depth
687+
// as `b`. The first move up is trivial because we already found
688+
// `parent_a` above; the loop does the remaining N-1 moves.
689+
a = parent_a;
690+
for _ in 0..(parent_a_depth - parent_b_depth - 1) {
691+
a = self.parent_map.get(&a).unwrap().0;
705692
}
706-
707-
if ma.is_none() && mb.is_none() {
708-
// No nearest common ancestor found.
709-
bug!();
693+
} else if parent_b_depth > parent_a_depth {
694+
// `b` is lower than `a`.
695+
b = parent_b;
696+
for _ in 0..(parent_b_depth - parent_a_depth - 1) {
697+
b = self.parent_map.get(&b).unwrap().0;
710698
}
699+
} else {
700+
// Both scopes are at the same depth, and we know they're not equal
701+
// because that case was tested for at the top of this function. So
702+
// we can trivially move them both up one level now.
703+
assert!(parent_a_depth != 0);
704+
a = parent_a;
705+
b = parent_b;
711706
}
707+
708+
// Now both scopes are at the same level. We move upwards in lockstep
709+
// until they match. In practice, this loop is almost always executed
710+
// zero times because `a` is almost always a direct ancestor of `b` or
711+
// vice versa.
712+
while a != b {
713+
a = self.parent_map.get(&a).unwrap().0;
714+
b = self.parent_map.get(&b).unwrap().0;
715+
};
716+
717+
a
712718
}
713719

714720
/// Assuming that the provided region was defined within this `ScopeTree`,
@@ -807,7 +813,7 @@ fn record_var_lifetime(visitor: &mut RegionResolutionVisitor,
807813
//
808814
// extern fn isalnum(c: c_int) -> c_int
809815
}
810-
Some(parent_scope) =>
816+
Some((parent_scope, _)) =>
811817
visitor.scope_tree.record_var_scope(var_id, parent_scope),
812818
}
813819
}
@@ -1019,7 +1025,7 @@ fn resolve_expr<'a, 'tcx>(visitor: &mut RegionResolutionVisitor<'a, 'tcx>, expr:
10191025
// Keep traversing up while we can.
10201026
match visitor.scope_tree.parent_map.get(&scope) {
10211027
// Don't cross from closure bodies to their parent.
1022-
Some(&superscope) => match superscope.data() {
1028+
Some(&(superscope, _)) => match superscope.data() {
10231029
ScopeData::CallSite(_) => break,
10241030
_ => scope = superscope
10251031
},
@@ -1036,7 +1042,7 @@ fn resolve_local<'a, 'tcx>(visitor: &mut RegionResolutionVisitor<'a, 'tcx>,
10361042
init: Option<&'tcx hir::Expr>) {
10371043
debug!("resolve_local(pat={:?}, init={:?})", pat, init);
10381044

1039-
let blk_scope = visitor.cx.var_parent;
1045+
let blk_scope = visitor.cx.var_parent.map(|(p, _)| p);
10401046

10411047
// As an exception to the normal rules governing temporary
10421048
// lifetimes, initializers in a let have a temporary lifetime
@@ -1261,16 +1267,20 @@ fn resolve_local<'a, 'tcx>(visitor: &mut RegionResolutionVisitor<'a, 'tcx>,
12611267

12621268
impl<'a, 'tcx> RegionResolutionVisitor<'a, 'tcx> {
12631269
/// Records the current parent (if any) as the parent of `child_scope`.
1264-
fn record_child_scope(&mut self, child_scope: Scope) {
1270+
/// Returns the depth of `child_scope`.
1271+
fn record_child_scope(&mut self, child_scope: Scope) -> ScopeDepth {
12651272
let parent = self.cx.parent;
12661273
self.scope_tree.record_scope_parent(child_scope, parent);
1274+
// If `child_scope` has no parent, it must be the root node, and so has
1275+
// a depth of 1. Otherwise, its depth is one more than its parent's.
1276+
parent.map_or(1, |(_p, d)| d + 1)
12671277
}
12681278

12691279
/// Records the current parent (if any) as the parent of `child_scope`,
12701280
/// and sets `child_scope` as the new current parent.
12711281
fn enter_scope(&mut self, child_scope: Scope) {
1272-
self.record_child_scope(child_scope);
1273-
self.cx.parent = Some(child_scope);
1282+
let child_depth = self.record_child_scope(child_scope);
1283+
self.cx.parent = Some((child_scope, child_depth));
12741284
}
12751285

12761286
fn enter_node_scope_with_dtor(&mut self, id: hir::ItemLocalId) {

src/librustc_driver/test.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,12 @@ impl<'a, 'gcx, 'tcx> Env<'a, 'gcx, 'tcx> {
191191
self.infcx.tcx
192192
}
193193

194-
pub fn create_region_hierarchy(&mut self, rh: &RH, parent: region::Scope) {
194+
pub fn create_region_hierarchy(&mut self, rh: &RH,
195+
parent: (region::Scope, region::ScopeDepth)) {
195196
let me = region::Scope::Node(rh.id);
196197
self.region_scope_tree.record_scope_parent(me, Some(parent));
197198
for child_rh in rh.sub {
198-
self.create_region_hierarchy(child_rh, me);
199+
self.create_region_hierarchy(child_rh, (me, parent.1 + 1));
199200
}
200201
}
201202

@@ -215,7 +216,7 @@ impl<'a, 'gcx, 'tcx> Env<'a, 'gcx, 'tcx> {
215216
id: hir::ItemLocalId(11),
216217
sub: &[],
217218
}],
218-
}, dscope);
219+
}, (dscope, 1));
219220
}
220221

221222
#[allow(dead_code)] // this seems like it could be useful, even if we don't use it now

0 commit comments

Comments
 (0)