Skip to content

Commit 25f98bc

Browse files
mkustermanncommit-bot@chromium.org
authored andcommitted
[VM] Introduction of type testing stubs - Part 3
The changes include: * Make AssertAssignableInstr no longer have a call-summary, which helps methods with several parameter checks by not having to re-load/re-initialize type arguments registers * Lazily create SubtypeTestCaches: We already go to runtime to warm up the caches, so we now also create the caches on the first runtime call and patch the pool entries. * No longer load the destination name into a register: We only need the name when we throw an exception, so it is not on the hot path. Instead we let the runtime look at the call site, decoding a pool index from the instructions stream. The destination name will be available in the pool, at a consecutive index to the subtype cache. * Remove the fall-through to N=1 case for probing subtypeing tests, since those will always be handled by the optimized stubs. * Do not generate optimized stubs for FutureOr<T> (so far it just falled-through to TTS). We can make optimzed version of that later, but it requires special subtyping rules. * Local code quality improvement in the type-testing-stubs: Avoid extra jump at last case of cid-class-range checks. There are still a number of optimization opportunities we can do in future changes. Issue #31798 Change-Id: I4dc5a8a49f939178fe74d44736ef69e4b9088e46 Reviewed-on: https://dart-review.googlesource.com/46984 Reviewed-by: Vyacheslav Egorov <[email protected]> Reviewed-by: Régis Crelier <[email protected]>
1 parent f226c22 commit 25f98bc

24 files changed

+491
-152
lines changed

runtime/vm/code_patcher.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,15 @@ WritableInstructionsScope::~WritableInstructionsScope() {
2828
}
2929
}
3030

31+
bool MatchesPattern(uword addr, int16_t* pattern, intptr_t size) {
32+
uint8_t* bytes = reinterpret_cast<uint8_t*>(addr);
33+
for (intptr_t i = 0; i < size; i++) {
34+
int16_t val = pattern[i];
35+
if ((val >= 0) && (val != bytes[i])) {
36+
return false;
37+
}
38+
}
39+
return true;
40+
}
41+
3142
} // namespace dart

runtime/vm/code_patcher.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,16 @@ class CodePatcher : public AllStatic {
102102
NativeFunction target,
103103
const Code& trampoline);
104104
#endif
105+
106+
static intptr_t GetSubtypeTestCachePoolIndex(uword return_address);
105107
};
106108

109+
// Beginning from [addr] we compare [size] bytes with [pattern]. All [0..255]
110+
// values in [pattern] have to match, negative values are skipped.
111+
//
112+
// Example pattern: `[0x3d, 0x8b, -1, -1]`.
113+
bool MatchesPattern(uword addr, int16_t* pattern, intptr_t size);
114+
107115
} // namespace dart
108116

109117
#endif // RUNTIME_VM_CODE_PATCHER_H_

runtime/vm/code_patcher_x64.cc

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,6 @@
1616

1717
namespace dart {
1818

19-
static bool MatchesPattern(uword addr, int16_t* pattern, intptr_t size) {
20-
uint8_t* bytes = reinterpret_cast<uint8_t*>(addr);
21-
for (intptr_t i = 0; i < size; i++) {
22-
int16_t val = pattern[i];
23-
if ((val >= 0) && (val != bytes[i])) {
24-
return false;
25-
}
26-
}
27-
return true;
28-
}
29-
3019
intptr_t IndexFromPPLoad(uword start) {
3120
int32_t offset = *reinterpret_cast<int32_t*>(start);
3221
return ObjectPool::IndexFromOffset(offset);

runtime/vm/compiler/backend/flow_graph_compiler.cc

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ bool FlowGraphCompiler::NeedsEdgeCounter(TargetEntryInstr* block) {
12141214
(block == flow_graph().graph_entry()->normal_entry())));
12151215
}
12161216

1217-
// Allocate a register that is not explicitly blocked.
1217+
// Allocate a register that is not explictly blocked.
12181218
static Register AllocateFreeRegister(bool* blocked_registers) {
12191219
for (intptr_t regno = 0; regno < kNumberOfCpuRegisters; regno++) {
12201220
if (!blocked_registers[regno]) {
@@ -1879,18 +1879,32 @@ bool FlowGraphCompiler::GenerateSubtypeRangeCheck(Register class_id_reg,
18791879
void FlowGraphCompiler::GenerateCidRangesCheck(Assembler* assembler,
18801880
Register class_id_reg,
18811881
const CidRangeVector& cid_ranges,
1882-
Label* is_subtype) {
1883-
Label fail;
1882+
Label* inside_range_lbl,
1883+
Label* outside_range_lbl,
1884+
bool fall_through_if_inside) {
1885+
// If there are no valid class ranges, the check will fail. If we are
1886+
// supposed to fall-through in the positive case, we'll explicitly jump to
1887+
// the [outside_range_lbl].
1888+
if (cid_ranges.length() == 1 && cid_ranges[0].IsIllegalRange()) {
1889+
if (fall_through_if_inside) {
1890+
assembler->Jump(outside_range_lbl);
1891+
}
1892+
return;
1893+
}
1894+
18841895
int bias = 0;
18851896
for (intptr_t i = 0; i < cid_ranges.length(); ++i) {
18861897
const CidRange& range = cid_ranges[i];
1887-
if (!range.IsIllegalRange()) {
1888-
bias = EmitTestAndCallCheckCid(assembler, is_subtype, class_id_reg, range,
1889-
bias,
1890-
/*jump_on_miss=*/false);
1891-
}
1898+
RELEASE_ASSERT(!range.IsIllegalRange());
1899+
const bool last_round = i == (cid_ranges.length() - 1);
1900+
1901+
Label* jump_label = last_round && fall_through_if_inside ? outside_range_lbl
1902+
: inside_range_lbl;
1903+
const bool jump_on_miss = last_round && fall_through_if_inside;
1904+
1905+
bias = EmitTestAndCallCheckCid(assembler, jump_label, class_id_reg, range,
1906+
bias, jump_on_miss);
18921907
}
1893-
assembler->Bind(&fail);
18941908
}
18951909

18961910
void FlowGraphCompiler::GenerateAssertAssignableAOT(
@@ -1901,7 +1915,7 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
19011915
const Register function_type_args_reg,
19021916
const Register subtype_cache_reg,
19031917
const Register dst_type_reg,
1904-
const Register dst_name_reg,
1918+
const Register scratch_reg,
19051919
Label* done) {
19061920
// If the int type is assignable to [dst_type] we special case it on the
19071921
// caller side!
@@ -1929,9 +1943,6 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
19291943
if (hi != NULL) {
19301944
const Class& type_class = Class::Handle(zone(), dst_type.type_class());
19311945

1932-
// We can use [dst_name_reg], there is no overlap of use.
1933-
const Register scratch_reg = dst_name_reg;
1934-
19351946
bool used_cid_range_check = false;
19361947
const bool can_use_simple_cid_range_test =
19371948
hi->CanUseSubtypeRangeCheckFor(dst_type);
@@ -1953,10 +1964,6 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
19531964
}
19541965
__ LoadObject(dst_type_reg, dst_type);
19551966
}
1956-
1957-
__ LoadObject(dst_name_reg, dst_name);
1958-
__ LoadObject(subtype_cache_reg,
1959-
SubtypeTestCache::ZoneHandle(zone(), SubtypeTestCache::New()));
19601967
}
19611968

19621969
#undef __

runtime/vm/compiler/backend/flow_graph_compiler.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ class FlowGraphCompiler : public ValueObject {
345345
const Register function_type_args_reg,
346346
const Register subtype_cache_reg,
347347
const Register dst_type_reg,
348-
const Register dst_name_reg,
348+
const Register scratch_reg,
349349
Label* done);
350350

351351
// DBC emits calls very differently from all other architectures due to its
@@ -424,11 +424,15 @@ class FlowGraphCompiler : public ValueObject {
424424
// of going into the subtyping cache)
425425
static const intptr_t kMaxNumberOfCidRangesToTest = 4;
426426

427-
// Falls through to false.
427+
// If [fall_through_if_inside] is `true`, then [outside_range_lbl] must be
428+
// supplied, since it will be jumped to in the last case if the cid is outside
429+
// the range.
428430
static void GenerateCidRangesCheck(Assembler* assembler,
429431
Register class_id_reg,
430432
const CidRangeVector& cid_ranges,
431-
Label* is_subtype_lbl);
433+
Label* inside_range_lbl,
434+
Label* outside_range_lbl = NULL,
435+
bool fall_through_if_inside = false);
432436

433437
void EmitOptimizedInstanceCall(const StubEntry& stub_entry,
434438
const ICData& ic_data,

runtime/vm/compiler/backend/flow_graph_compiler_arm.cc

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,17 +717,33 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
717717

718718
const Register kSubtypeTestCacheReg = R3;
719719
const Register kDstTypeReg = R8;
720-
const Register kDstNameReg = R4;
720+
const Register kScratchReg = R4;
721721

722722
Label done;
723723

724724
GenerateAssertAssignableAOT(dst_type, dst_name, kInstanceReg,
725725
kInstantiatorTypeArgumentsReg,
726726
kFunctionTypeArgumentsReg, kSubtypeTestCacheReg,
727-
kDstTypeReg, kDstNameReg, &done);
727+
kDstTypeReg, kScratchReg, &done);
728+
// We use 2 consecutive entries in the pool for the subtype cache and the
729+
// destination name. The second entry, namely [dst_name] seems to be unused,
730+
// but it will be used by the code throwing a TypeError if the type test fails
731+
// (see runtime/vm/runtime_entry.cc:TypeCheck). It will use pattern matching
732+
// on the call site to find out at which pool index the destination name is
733+
// located.
734+
const intptr_t sub_type_cache_index = __ object_pool_wrapper().AddObject(
735+
Object::null_object(), Patchability::kPatchable);
736+
const intptr_t sub_type_cache_offset =
737+
ObjectPool::element_offset(sub_type_cache_index) - kHeapObjectTag;
738+
const intptr_t dst_name_index =
739+
__ object_pool_wrapper().AddObject(dst_name, Patchability::kPatchable);
740+
ASSERT((sub_type_cache_index + 1) == dst_name_index);
741+
ASSERT(__ constant_pool_allowed());
742+
728743
__ LoadField(R9,
729744
FieldAddress(kDstTypeReg,
730745
AbstractType::type_test_stub_entry_point_offset()));
746+
__ ldr(kSubtypeTestCacheReg, Address(PP, sub_type_cache_offset));
731747
__ blx(R9);
732748
EmitCallsiteMetadata(token_pos, deopt_id, RawPcDescriptors::kOther, locs);
733749
__ Bind(&done);
@@ -1233,13 +1249,14 @@ int FlowGraphCompiler::EmitTestAndCallCheckCid(Assembler* assembler,
12331249
bool jump_on_miss) {
12341250
intptr_t cid_start = range.cid_start;
12351251
if (range.IsSingleCid()) {
1236-
__ CompareImmediate(class_id_reg, cid_start - bias);
1237-
__ b(label, jump_on_miss ? NE : EQ);
1252+
__ AddImmediateSetFlags(class_id_reg, class_id_reg, bias - cid_start);
1253+
__ BranchIf(jump_on_miss ? NOT_ZERO : ZERO, label);
1254+
bias = cid_start;
12381255
} else {
12391256
__ AddImmediate(class_id_reg, class_id_reg, bias - cid_start);
1240-
bias = cid_start;
12411257
__ CompareImmediate(class_id_reg, range.Extent());
1242-
__ b(label, jump_on_miss ? HI : LS); // Unsigned higher.
1258+
__ BranchIf(jump_on_miss ? UNSIGNED_GREATER : UNSIGNED_LESS_EQUAL, label);
1259+
bias = cid_start;
12431260
}
12441261
return bias;
12451262
}

runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -694,18 +694,34 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
694694

695695
const Register kSubtypeTestCacheReg = R3;
696696
const Register kDstTypeReg = R8;
697-
const Register kDstNameReg = R4;
697+
const Register kScratchReg = R4;
698698

699699
Label done;
700700

701701
GenerateAssertAssignableAOT(dst_type, dst_name, kInstanceReg,
702702
kInstantiatorTypeArgumentsReg,
703703
kFunctionTypeArgumentsReg, kSubtypeTestCacheReg,
704-
kDstTypeReg, kDstNameReg, &done);
704+
kDstTypeReg, kScratchReg, &done);
705+
706+
// We use 2 consecutive entries in the pool for the subtype cache and the
707+
// destination name. The second entry, namely [dst_name] seems to be unused,
708+
// but it will be used by the code throwing a TypeError if the type test fails
709+
// (see runtime/vm/runtime_entry.cc:TypeCheck). It will use pattern matching
710+
// on the call site to find out at which pool index the destination name is
711+
// located.
712+
const intptr_t sub_type_cache_index = __ object_pool_wrapper().AddObject(
713+
Object::null_object(), Patchability::kPatchable);
714+
const intptr_t sub_type_cache_offset =
715+
ObjectPool::element_offset(sub_type_cache_index);
716+
const intptr_t dst_name_index =
717+
__ object_pool_wrapper().AddObject(dst_name, Patchability::kPatchable);
718+
ASSERT((sub_type_cache_index + 1) == dst_name_index);
719+
ASSERT(__ constant_pool_allowed());
705720

706721
__ LoadField(R9,
707722
FieldAddress(kDstTypeReg,
708723
AbstractType::type_test_stub_entry_point_offset()));
724+
__ ldr(kSubtypeTestCacheReg, Address(PP, sub_type_cache_offset));
709725
__ blr(R9);
710726
EmitCallsiteMetadata(token_pos, deopt_id, RawPcDescriptors::kOther, locs);
711727
__ Bind(&done);
@@ -1190,13 +1206,14 @@ int FlowGraphCompiler::EmitTestAndCallCheckCid(Assembler* assembler,
11901206
bool jump_on_miss) {
11911207
intptr_t cid_start = range.cid_start;
11921208
if (range.IsSingleCid()) {
1193-
__ CompareImmediate(class_id_reg, cid_start - bias);
1194-
__ b(label, jump_on_miss ? NE : EQ);
1209+
__ AddImmediateSetFlags(class_id_reg, class_id_reg, bias - cid_start);
1210+
__ BranchIf(jump_on_miss ? NOT_EQUAL : EQUAL, label);
1211+
bias = cid_start;
11951212
} else {
11961213
__ AddImmediate(class_id_reg, bias - cid_start);
11971214
bias = cid_start;
11981215
__ CompareImmediate(class_id_reg, range.Extent());
1199-
__ b(label, jump_on_miss ? HI : LS); // Unsigned higher / less-or-equal.
1216+
__ BranchIf(jump_on_miss ? UNSIGNED_GREATER : UNSIGNED_LESS_EQUAL, label);
12001217
}
12011218
return bias;
12021219
}

runtime/vm/compiler/backend/flow_graph_compiler_x64.cc

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -702,14 +702,30 @@ void FlowGraphCompiler::GenerateAssertAssignableAOT(
702702
Label done;
703703

704704
const Register subtype_cache_reg = R9;
705-
const Register dst_type_reg = RBX;
706-
const Register dst_name_reg = R10;
705+
const Register kScratchReg = RBX;
707706

708707
GenerateAssertAssignableAOT(dst_type, dst_name, kInstanceReg,
709708
kInstantiatorTypeArgumentsReg,
710709
kFunctionTypeArgumentsReg, subtype_cache_reg,
711-
dst_type_reg, dst_name_reg, &done);
712-
710+
kScratchReg, kScratchReg, &done);
711+
712+
// We use 2 consecutive entries in the pool for the subtype cache and the
713+
// destination name. The second entry, namely [dst_name] seems to be unused,
714+
// but it will be used by the code throwing a TypeError if the type test fails
715+
// (see runtime/vm/runtime_entry.cc:TypeCheck). It will use pattern matching
716+
// on the call site to find out at which pool index the destination name is
717+
// located.
718+
const intptr_t sub_type_cache_index = __ object_pool_wrapper().AddObject(
719+
Object::null_object(), Patchability::kPatchable);
720+
const intptr_t sub_type_cache_offset =
721+
ObjectPool::element_offset(sub_type_cache_index) - kHeapObjectTag;
722+
const intptr_t dst_name_index =
723+
__ object_pool_wrapper().AddObject(dst_name, Patchability::kPatchable);
724+
ASSERT((sub_type_cache_index + 1) == dst_name_index);
725+
ASSERT(__ constant_pool_allowed());
726+
727+
__ movq(subtype_cache_reg,
728+
Address::AddressBaseImm32(PP, sub_type_cache_offset));
713729
__ call(FieldAddress(RBX, AbstractType::type_test_stub_entry_point_offset()));
714730
EmitCallsiteMetadata(token_pos, deopt_id, RawPcDescriptors::kOther, locs);
715731
__ Bind(&done);
@@ -1151,14 +1167,13 @@ int FlowGraphCompiler::EmitTestAndCallCheckCid(Assembler* assembler,
11511167
bool jump_on_miss) {
11521168
intptr_t cid_start = range.cid_start;
11531169
if (range.IsSingleCid()) {
1154-
__ cmpl(class_id_reg, Immediate(cid_start - bias));
1155-
__ j(jump_on_miss ? NOT_EQUAL : EQUAL, label);
1170+
__ CompareImmediate(class_id_reg, cid_start - bias);
1171+
__ BranchIf(jump_on_miss ? NOT_EQUAL : EQUAL, label);
11561172
} else {
11571173
__ addl(class_id_reg, Immediate(bias - cid_start));
11581174
bias = cid_start;
11591175
__ cmpl(class_id_reg, Immediate(range.Extent()));
1160-
__ j(jump_on_miss ? ABOVE : BELOW_EQUAL,
1161-
label); // Unsigned higher / lower-or-equal.
1176+
__ BranchIf(jump_on_miss ? UNSIGNED_GREATER : UNSIGNED_LESS_EQUAL, label);
11621177
}
11631178
return bias;
11641179
}

runtime/vm/compiler/backend/il.cc

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,18 @@ bool HierarchyInfo::CanUseSubtypeRangeCheckFor(const AbstractType& type) {
151151
Zone* zone = thread_->zone();
152152
const Class& type_class = Class::Handle(zone, type.type_class());
153153

154+
// The FutureOr<T> type cannot be handled by checking whether the instance is
155+
// a subtype of FutureOr and then checking whether the type argument `T`
156+
// matches.
157+
//
158+
// Instead we would need to perform multiple checks:
159+
//
160+
// instance is Null || instance is T || instance is Future<T>
161+
//
162+
if (type_class.IsFutureOrClass()) {
163+
return false;
164+
}
165+
154166
// We can use class id range checks only if we don't have to test type
155167
// arguments.
156168
//
@@ -187,6 +199,18 @@ bool HierarchyInfo::CanUseGenericSubtypeRangeCheckFor(
187199
const intptr_t num_type_parameters = type_class.NumTypeParameters();
188200
const intptr_t num_type_arguments = type_class.NumTypeArguments();
189201

202+
// The FutureOr<T> type cannot be handled by checking whether the instance is
203+
// a subtype of FutureOr and then checking whether the type argument `T`
204+
// matches.
205+
//
206+
// Instead we would need to perform multiple checks:
207+
//
208+
// instance is Null || instance is T || instance is Future<T>
209+
//
210+
if (type_class.IsFutureOrClass()) {
211+
return false;
212+
}
213+
190214
// This function should only be called for generic classes.
191215
ASSERT(type_class.NumTypeParameters() > 0 &&
192216
type.arguments() != TypeArguments::null());
@@ -212,7 +236,9 @@ bool HierarchyInfo::CanUseGenericSubtypeRangeCheckFor(
212236
AbstractType& type_arg = AbstractType::Handle(zone);
213237
for (intptr_t i = 0; i < num_type_parameters; ++i) {
214238
type_arg = ta.TypeAt(num_type_arguments - num_type_parameters + i);
215-
if (!CanUseSubtypeRangeCheckFor(type_arg) && !type_arg.IsTypeParameter()) {
239+
// NOTE: We can handle type parameters but it increases size by a
240+
// significant amount.
241+
if (!CanUseSubtypeRangeCheckFor(type_arg)) {
216242
return false;
217243
}
218244
}

0 commit comments

Comments
 (0)