Skip to content

Commit b92b9e1

Browse files
byrootjhawthorn
andcommitted
vm_getivar: assume the cached shape_id like have a common ancestor
When an inline cache misses, it is very likely that the stale shape_id and the current instance shape_id have a close common ancestor. For example if the instance variable is sometimes frozen sometimes not, one of the two shape will be the direct parent of the other. Another pattern that commonly cause IC misses is "memoization", in such case the object will have a "base common shape" and then a number of close descendants. In addition, when we find a common ancestor, we store it in the inline cache instead of the current shape. This help prevent the cache from flip-flopping, ensuring the next lookup will be marginally faster and more generally avoid writing in memory too much. However, now that shapes have an ancestors index, we only check for a few ancestors before falling back to use the index. So overall this change speeds up what is assumed to be the more common case, but makes what is assumed to be the less common case a bit slower. ``` compare-ruby: ruby 3.3.0dev (2023-10-26T05:30:17Z master 701ca07) [arm64-darwin22] built-ruby: ruby 3.3.0dev (2023-10-26T09:25:09Z shapes_double_sear.. a723a85235) [arm64-darwin22] warming up...... | |compare-ruby|built-ruby| |:------------------------------------|-----------:|---------:| |vm_ivar_stable_shape | 11.672M| 11.679M| | | -| 1.00x| |vm_ivar_memoize_unstable_shape | 7.551M| 10.506M| | | -| 1.39x| |vm_ivar_memoize_unstable_shape_miss | 11.591M| 11.624M| | | -| 1.00x| |vm_ivar_unstable_undef | 9.037M| 7.981M| | | 1.13x| -| |vm_ivar_divergent_shape | 8.034M| 6.657M| | | 1.21x| -| |vm_ivar_divergent_shape_imbalanced | 10.471M| 9.231M| | | 1.13x| -| ``` Co-Authored-By: John Hawthorn <[email protected]>
1 parent 0ae6a2a commit b92b9e1

File tree

4 files changed

+156
-7
lines changed

4 files changed

+156
-7
lines changed

benchmark/vm_ivar_memoize.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
prelude: |
2+
IVARS = 60
3+
class Record
4+
def initialize(offset = false)
5+
@offset = 1 if offset
6+
@first = 0
7+
IVARS.times do |i|
8+
instance_variable_set("@ivar_#{i}", i)
9+
end
10+
end
11+
12+
def first
13+
@first
14+
end
15+
16+
def lazy_set
17+
@lazy_set ||= 123
18+
end
19+
20+
def undef
21+
@undef
22+
end
23+
end
24+
25+
Record.new # Need one alloc to right size
26+
27+
BASE = Record.new
28+
LAZY = Record.new
29+
LAZY.lazy_set
30+
31+
class Miss < Record
32+
@first = 0
33+
IVARS.times do |i|
34+
instance_variable_set("@i_#{i}", i)
35+
end
36+
end
37+
38+
Miss.new # Need one alloc to right size
39+
MISS = Miss.new
40+
41+
DIVERGENT = Record.new(true)
42+
43+
benchmark:
44+
vm_ivar_stable_shape: |
45+
BASE.first
46+
BASE.first
47+
BASE.first
48+
BASE.first
49+
BASE.first
50+
BASE.first
51+
vm_ivar_memoize_unstable_shape: |
52+
BASE.first
53+
LAZY.first
54+
BASE.first
55+
LAZY.first
56+
BASE.first
57+
LAZY.first
58+
vm_ivar_memoize_unstable_shape_miss: |
59+
BASE.first
60+
MISS.first
61+
BASE.first
62+
MISS.first
63+
BASE.first
64+
MISS.first
65+
vm_ivar_unstable_undef: |
66+
BASE.undef
67+
LAZY.undef
68+
BASE.undef
69+
LAZY.undef
70+
BASE.undef
71+
LAZY.undef
72+
vm_ivar_divergent_shape: |
73+
BASE.first
74+
DIVERGENT.first
75+
BASE.first
76+
DIVERGENT.first
77+
BASE.first
78+
DIVERGENT.first
79+
vm_ivar_divergent_shape_imbalanced: |
80+
BASE.first
81+
DIVERGENT.first
82+
DIVERGENT.first
83+
DIVERGENT.first
84+
DIVERGENT.first
85+
DIVERGENT.first

shape.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#define SINGLE_CHILD(x) (rb_shape_t *)((uintptr_t)x & SINGLE_CHILD_MASK)
3939
#define ANCESTOR_CACHE_THRESHOLD 10
4040
#define MAX_SHAPE_ID (SHAPE_BUFFER_SIZE - 1)
41+
#define ANCESTOR_SEARCH_MAX_DEPTH 2
4142

4243
static ID id_frozen;
4344
static ID id_t_object;
@@ -723,6 +724,62 @@ rb_shape_transition_shape_capa(rb_shape_t* shape)
723724
return rb_shape_transition_shape_capa_create(shape, rb_malloc_grow_capa(shape->capacity, sizeof(VALUE)));
724725
}
725726

727+
// Same as rb_shape_get_iv_index, but uses a provided valid shape id and index
728+
// to return a result faster if branches of the shape tree are closely related.
729+
bool
730+
rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t *value, shape_id_t *shape_id_hint)
731+
{
732+
attr_index_t index_hint = *value;
733+
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
734+
rb_shape_t *initial_shape = shape;
735+
736+
if (*shape_id_hint == INVALID_SHAPE_ID) {
737+
*shape_id_hint = shape_id;
738+
return rb_shape_get_iv_index(shape, id, value);
739+
}
740+
741+
rb_shape_t * shape_hint = rb_shape_get_shape_by_id(*shape_id_hint);
742+
743+
// We assume it's likely shape_id_hint and shape_id have a close common
744+
// ancestor, so we check up to ANCESTOR_SEARCH_MAX_DEPTH ancestors before
745+
// eventually using the index, as in case of a match it will be faster.
746+
// However if the shape doesn't have an index, we walk the entire tree.
747+
int depth = INT_MAX;
748+
if (shape->ancestor_index && shape->next_iv_index >= ANCESTOR_CACHE_THRESHOLD) {
749+
depth = ANCESTOR_SEARCH_MAX_DEPTH;
750+
}
751+
752+
while (depth > 0 && shape->next_iv_index > index_hint) {
753+
while (shape_hint->next_iv_index > shape->next_iv_index) {
754+
shape_hint = rb_shape_get_parent(shape_hint);
755+
}
756+
757+
if (shape_hint == shape) {
758+
// We've found a common ancestor so use the index hint
759+
*value = index_hint;
760+
*shape_id_hint = rb_shape_id(shape);
761+
return true;
762+
}
763+
if (shape->edge_name == id) {
764+
// We found the matching id before a common ancestor
765+
*value = shape->next_iv_index - 1;
766+
*shape_id_hint = rb_shape_id(shape);
767+
return true;
768+
}
769+
770+
shape = rb_shape_get_parent(shape);
771+
depth--;
772+
}
773+
774+
// If the original shape had an index but its ancestor doesn't
775+
// we switch back to the original one as it will be faster.
776+
if (!shape->ancestor_index && initial_shape->ancestor_index) {
777+
shape = initial_shape;
778+
}
779+
*shape_id_hint = shape_id;
780+
return rb_shape_get_iv_index(shape, id, value);
781+
}
782+
726783
bool
727784
rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
728785
{

shape.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ rb_shape_t* rb_shape_get_shape_by_id(shape_id_t shape_id);
155155
shape_id_t rb_shape_get_shape_id(VALUE obj);
156156
rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t * shape, ID id);
157157
bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t * value);
158+
bool rb_shape_get_iv_index_with_hint(shape_id_t shape_id, ID id, attr_index_t * value, shape_id_t *shape_id_hint);
158159
bool rb_shape_obj_too_complex(VALUE obj);
159160

160161
void rb_shape_set_shape(VALUE obj, rb_shape_t* shape);

vm_insnhelper.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,8 +1286,6 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
12861286
}
12871287
#endif
12881288

1289-
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
1290-
12911289
if (shape_id == OBJ_TOO_COMPLEX_SHAPE_ID) {
12921290
st_table *table = NULL;
12931291
switch (BUILTIN_TYPE(obj)) {
@@ -1314,14 +1312,22 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
13141312
}
13151313
}
13161314
else {
1317-
if (rb_shape_get_iv_index(shape, id, &index)) {
1315+
shape_id_t previous_cached_id = cached_id;
1316+
if (rb_shape_get_iv_index_with_hint(shape_id, id, &index, &cached_id)) {
13181317
// This fills in the cache with the shared cache object.
13191318
// "ent" is the shared cache object
1320-
fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id);
1319+
if (cached_id != previous_cached_id) {
1320+
fill_ivar_cache(iseq, ic, cc, is_attr, index, cached_id);
1321+
}
13211322

1322-
// We fetched the ivar list above
1323-
val = ivar_list[index];
1324-
RUBY_ASSERT(!UNDEF_P(val));
1323+
if (index == ATTR_INDEX_NOT_SET) {
1324+
val = default_value;
1325+
}
1326+
else {
1327+
// We fetched the ivar list above
1328+
val = ivar_list[index];
1329+
RUBY_ASSERT(!UNDEF_P(val));
1330+
}
13251331
}
13261332
else {
13271333
if (is_attr) {

0 commit comments

Comments
 (0)