Skip to content

Commit 1aa7a93

Browse files
dcharkesCommit Queue
authored and
Commit Queue
committed
[vm] MemoryCopyInstr optimize constant src and dest start
When constants are passed in to the source and destination start, no registers are needed for these. The constants are directly compiled into the machine code. If the constant happens to be 0, no machine code is emitted at all. I did not measure any speed improvements. Likely the micro-code schedulers in the CPUs already noticed the no-ops. I have verified manually that we emit smaller machine code with these changes on x64. TEST=runtime/vm/compiler/backend/memory_copy_test.cc Bug: #51031 Change-Id: I70f12c9ae299b44a8f5007ca3a8c5ee56a9aff40 Cq-Include-Trybots: luci.dart.try:vm-precomp-ffi-qemu-linux-release-riscv64-try,vm-precomp-ffi-qemu-linux-release-arm-try,vm-ffi-android-debug-arm64c-try,vm-ffi-android-debug-arm-try,vm-kernel-nnbd-mac-debug-arm64-try,vm-kernel-nnbd-win-debug-x64-try,vm-kernel-win-debug-x64c-try,vm-kernel-win-debug-ia32-try,vm-kernel-nnbd-linux-debug-ia32-try,vm-reload-rollback-linux-debug-x64-try Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/279170 Reviewed-by: Alexander Markov <[email protected]>
1 parent bf7d643 commit 1aa7a93

File tree

6 files changed

+117
-76
lines changed

6 files changed

+117
-76
lines changed

runtime/vm/compiler/backend/il.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3085,12 +3085,11 @@ class MemoryCopyInstr : public TemplateInstruction<5, NoThrow> {
30853085

30863086
private:
30873087
// Set array_reg to point to the index indicated by start (contained in
3088-
// start_reg) of the typed data or string in array (contained in array_reg).
3088+
// start_loc) of the typed data or string in array (contained in array_reg).
30893089
void EmitComputeStartPointer(FlowGraphCompiler* compiler,
30903090
classid_t array_cid,
3091-
Value* start,
30923091
Register array_reg,
3093-
Register start_reg);
3092+
Location start_loc);
30943093

30953094
static bool IsArrayTypeSupported(classid_t array_cid) {
30963095
if (IsTypedDataBaseClassId(array_cid)) {

runtime/vm/compiler/backend/il_arm.cc

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
165165
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
166166
locs->set_in(kSrcPos, Location::WritableRegister());
167167
locs->set_in(kDestPos, Location::WritableRegister());
168-
locs->set_in(kSrcStartPos, Location::RequiresRegister());
169-
locs->set_in(kDestStartPos, Location::RequiresRegister());
168+
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
169+
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
170170
locs->set_in(kLengthPos, Location::WritableRegister());
171171
for (intptr_t i = 0; i < kNumTemps; i++) {
172172
locs->set_temp(i, Location::RequiresRegister());
@@ -177,8 +177,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
177177
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
178178
const Register src_reg = locs()->in(kSrcPos).reg();
179179
const Register dest_reg = locs()->in(kDestPos).reg();
180-
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
181-
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
180+
const Location src_start_loc = locs()->in(kSrcStartPos);
181+
const Location dest_start_loc = locs()->in(kDestStartPos);
182182
const Register length_reg = locs()->in(kLengthPos).reg();
183183

184184
const Register temp_reg = locs()->temp(0).reg();
@@ -187,10 +187,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
187187
temp_regs |= 1 << locs()->temp(i).reg();
188188
}
189189

190-
EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
191-
src_start_reg);
192-
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
193-
dest_start_reg);
190+
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
191+
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);
194192

195193
compiler::Label loop, done;
196194

@@ -230,44 +228,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
230228

231229
void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
232230
classid_t array_cid,
233-
Value* start,
234231
Register array_reg,
235-
Register start_reg) {
232+
Location start_loc) {
233+
intptr_t offset;
236234
if (IsTypedDataBaseClassId(array_cid)) {
237235
__ ldr(array_reg,
238236
compiler::FieldAddress(
239237
array_reg, compiler::target::PointerBase::data_offset()));
238+
offset = 0;
240239
} else {
241240
switch (array_cid) {
242241
case kOneByteStringCid:
243-
__ add(
244-
array_reg, array_reg,
245-
compiler::Operand(compiler::target::OneByteString::data_offset() -
246-
kHeapObjectTag));
242+
offset =
243+
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
247244
break;
248245
case kTwoByteStringCid:
249-
__ add(
250-
array_reg, array_reg,
251-
compiler::Operand(compiler::target::OneByteString::data_offset() -
252-
kHeapObjectTag));
246+
offset =
247+
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
253248
break;
254249
case kExternalOneByteStringCid:
255250
__ ldr(array_reg,
256251
compiler::FieldAddress(array_reg,
257252
compiler::target::ExternalOneByteString::
258253
external_data_offset()));
254+
offset = 0;
259255
break;
260256
case kExternalTwoByteStringCid:
261257
__ ldr(array_reg,
262258
compiler::FieldAddress(array_reg,
263259
compiler::target::ExternalTwoByteString::
264260
external_data_offset()));
261+
offset = 0;
265262
break;
266263
default:
267264
UNREACHABLE();
268265
break;
269266
}
270267
}
268+
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
269+
if (start_loc.IsConstant()) {
270+
const auto& constant = start_loc.constant();
271+
ASSERT(constant.IsInteger());
272+
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
273+
const intptr_t add_value = start_value * element_size_ + offset;
274+
__ AddImmediate(array_reg, add_value);
275+
return;
276+
}
277+
__ AddImmediate(array_reg, offset);
278+
const Register start_reg = start_loc.reg();
271279
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
272280
if (shift < 0) {
273281
__ add(array_reg, array_reg, compiler::Operand(start_reg, ASR, -shift));

runtime/vm/compiler/backend/il_arm64.cc

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
162162
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
163163
locs->set_in(kSrcPos, Location::WritableRegister());
164164
locs->set_in(kDestPos, Location::WritableRegister());
165-
locs->set_in(kSrcStartPos, Location::RequiresRegister());
166-
locs->set_in(kDestStartPos, Location::RequiresRegister());
165+
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
166+
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
167167
locs->set_in(kLengthPos, Location::WritableRegister());
168168
locs->set_temp(0, element_size_ == 16
169169
? Location::Pair(Location::RequiresRegister(),
@@ -175,8 +175,8 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
175175
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
176176
const Register src_reg = locs()->in(kSrcPos).reg();
177177
const Register dest_reg = locs()->in(kDestPos).reg();
178-
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
179-
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
178+
const Location src_start_loc = locs()->in(kSrcStartPos);
179+
const Location dest_start_loc = locs()->in(kDestStartPos);
180180
const Register length_reg = locs()->in(kLengthPos).reg();
181181

182182
Register temp_reg, temp_reg2;
@@ -189,10 +189,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
189189
temp_reg2 = kNoRegister;
190190
}
191191

192-
EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
193-
src_start_reg);
194-
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
195-
dest_start_reg);
192+
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
193+
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);
196194

197195
compiler::Label loop, done;
198196

@@ -236,44 +234,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
236234

237235
void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
238236
classid_t array_cid,
239-
Value* start,
240237
Register array_reg,
241-
Register start_reg) {
238+
Location start_loc) {
239+
intptr_t offset;
242240
if (IsTypedDataBaseClassId(array_cid)) {
243241
__ ldr(array_reg,
244242
compiler::FieldAddress(
245243
array_reg, compiler::target::PointerBase::data_offset()));
244+
offset = 0;
246245
} else {
247246
switch (array_cid) {
248247
case kOneByteStringCid:
249-
__ add(
250-
array_reg, array_reg,
251-
compiler::Operand(compiler::target::OneByteString::data_offset() -
252-
kHeapObjectTag));
248+
offset =
249+
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
253250
break;
254251
case kTwoByteStringCid:
255-
__ add(
256-
array_reg, array_reg,
257-
compiler::Operand(compiler::target::OneByteString::data_offset() -
258-
kHeapObjectTag));
252+
offset =
253+
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
259254
break;
260255
case kExternalOneByteStringCid:
261256
__ ldr(array_reg,
262257
compiler::FieldAddress(array_reg,
263258
compiler::target::ExternalOneByteString::
264259
external_data_offset()));
260+
offset = 0;
265261
break;
266262
case kExternalTwoByteStringCid:
267263
__ ldr(array_reg,
268264
compiler::FieldAddress(array_reg,
269265
compiler::target::ExternalTwoByteString::
270266
external_data_offset()));
267+
offset = 0;
271268
break;
272269
default:
273270
UNREACHABLE();
274271
break;
275272
}
276273
}
274+
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
275+
if (start_loc.IsConstant()) {
276+
const auto& constant = start_loc.constant();
277+
ASSERT(constant.IsInteger());
278+
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
279+
const intptr_t add_value = start_value * element_size_ + offset;
280+
__ AddImmediate(array_reg, add_value);
281+
return;
282+
}
283+
__ AddImmediate(array_reg, offset);
284+
const Register start_reg = start_loc.reg();
277285
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
278286
if (shift < 0) {
279287
#if defined(DART_COMPRESSED_POINTERS)

runtime/vm/compiler/backend/il_ia32.cc

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,23 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
8585
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
8686
locs->set_in(kSrcPos, Location::RequiresRegister());
8787
locs->set_in(kDestPos, Location::RegisterLocation(EDI));
88-
locs->set_in(kSrcStartPos, Location::WritableRegister());
89-
locs->set_in(kDestStartPos, Location::WritableRegister());
88+
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
89+
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
9090
locs->set_in(kLengthPos, Location::RegisterLocation(ECX));
9191
return locs;
9292
}
9393

9494
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
9595
const Register src_reg = locs()->in(kSrcPos).reg();
96-
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
97-
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
96+
const Location src_start_loc = locs()->in(kSrcStartPos);
97+
const Location dest_start_loc = locs()->in(kDestStartPos);
9898

9999
// Save ESI which is THR.
100100
__ pushl(ESI);
101101
__ movl(ESI, src_reg);
102102

103-
EmitComputeStartPointer(compiler, src_cid_, src_start(), ESI, src_start_reg);
104-
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), EDI,
105-
dest_start_reg);
103+
EmitComputeStartPointer(compiler, src_cid_, ESI, src_start_loc);
104+
EmitComputeStartPointer(compiler, dest_cid_, EDI, dest_start_loc);
106105
if (element_size_ <= 4) {
107106
__ SmiUntag(ECX);
108107
} else if (element_size_ == 16) {
@@ -128,9 +127,8 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
128127

129128
void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
130129
classid_t array_cid,
131-
Value* start,
132130
Register array_reg,
133-
Register start_reg) {
131+
Location start_loc) {
134132
intptr_t offset;
135133
if (IsTypedDataBaseClassId(array_cid)) {
136134
__ movl(array_reg,
@@ -166,6 +164,16 @@ void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
166164
break;
167165
}
168166
}
167+
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
168+
if (start_loc.IsConstant()) {
169+
const auto& constant = start_loc.constant();
170+
ASSERT(constant.IsInteger());
171+
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
172+
const intptr_t add_value = start_value * element_size_ + offset;
173+
__ AddImmediate(array_reg, add_value);
174+
return;
175+
}
176+
const Register start_reg = start_loc.reg();
169177
ScaleFactor scale;
170178
switch (element_size_) {
171179
case 1:

runtime/vm/compiler/backend/il_riscv.cc

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -179,23 +179,21 @@ LocationSummary* MemoryCopyInstr::MakeLocationSummary(Zone* zone,
179179
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);
180180
locs->set_in(kSrcPos, Location::WritableRegister());
181181
locs->set_in(kDestPos, Location::WritableRegister());
182-
locs->set_in(kSrcStartPos, Location::RequiresRegister());
183-
locs->set_in(kDestStartPos, Location::RequiresRegister());
182+
locs->set_in(kSrcStartPos, LocationRegisterOrConstant(src_start()));
183+
locs->set_in(kDestStartPos, LocationRegisterOrConstant(dest_start()));
184184
locs->set_in(kLengthPos, Location::WritableRegister());
185185
return locs;
186186
}
187187

188188
void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
189189
const Register src_reg = locs()->in(kSrcPos).reg();
190190
const Register dest_reg = locs()->in(kDestPos).reg();
191-
const Register src_start_reg = locs()->in(kSrcStartPos).reg();
192-
const Register dest_start_reg = locs()->in(kDestStartPos).reg();
191+
const Location src_start_loc = locs()->in(kSrcStartPos);
192+
const Location dest_start_loc = locs()->in(kDestStartPos);
193193
const Register length_reg = locs()->in(kLengthPos).reg();
194194

195-
EmitComputeStartPointer(compiler, src_cid_, src_start(), src_reg,
196-
src_start_reg);
197-
EmitComputeStartPointer(compiler, dest_cid_, dest_start(), dest_reg,
198-
dest_start_reg);
195+
EmitComputeStartPointer(compiler, src_cid_, src_reg, src_start_loc);
196+
EmitComputeStartPointer(compiler, dest_cid_, dest_reg, dest_start_loc);
199197

200198
compiler::Label loop, done;
201199

@@ -272,42 +270,54 @@ void MemoryCopyInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
272270

273271
void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
274272
classid_t array_cid,
275-
Value* start,
276273
Register array_reg,
277-
Register start_reg) {
274+
Location start_loc) {
275+
intptr_t offset;
278276
if (IsTypedDataBaseClassId(array_cid)) {
279277
__ lx(array_reg,
280278
compiler::FieldAddress(array_reg,
281279
compiler::target::PointerBase::data_offset()));
280+
offset = 0;
282281
} else {
283282
switch (array_cid) {
284283
case kOneByteStringCid:
285-
__ addi(
286-
array_reg, array_reg,
287-
compiler::target::OneByteString::data_offset() - kHeapObjectTag);
284+
offset =
285+
compiler::target::OneByteString::data_offset() - kHeapObjectTag;
288286
break;
289287
case kTwoByteStringCid:
290-
__ addi(
291-
array_reg, array_reg,
292-
compiler::target::OneByteString::data_offset() - kHeapObjectTag);
288+
offset =
289+
compiler::target::TwoByteString::data_offset() - kHeapObjectTag;
293290
break;
294291
case kExternalOneByteStringCid:
295292
__ lx(array_reg,
296293
compiler::FieldAddress(array_reg,
297294
compiler::target::ExternalOneByteString::
298295
external_data_offset()));
296+
offset = 0;
299297
break;
300298
case kExternalTwoByteStringCid:
301299
__ lx(array_reg,
302300
compiler::FieldAddress(array_reg,
303301
compiler::target::ExternalTwoByteString::
304302
external_data_offset()));
303+
offset = 0;
305304
break;
306305
default:
307306
UNREACHABLE();
308307
break;
309308
}
310309
}
310+
ASSERT(start_loc.IsRegister() || start_loc.IsConstant());
311+
if (start_loc.IsConstant()) {
312+
const auto& constant = start_loc.constant();
313+
ASSERT(constant.IsInteger());
314+
const int64_t start_value = Integer::Cast(constant).AsInt64Value();
315+
const intptr_t add_value = start_value * element_size_ + offset;
316+
__ AddImmediate(array_reg, add_value);
317+
return;
318+
}
319+
__ AddImmediate(array_reg, offset);
320+
const Register start_reg = start_loc.reg();
311321
intptr_t shift = Utils::ShiftForPowerOfTwo(element_size_) - 1;
312322
if (shift < 0) {
313323
__ srai(TMP, start_reg, -shift);

0 commit comments

Comments
 (0)