Skip to content

Commit 959f21a

Browse files
sstricklCommit Queue
authored and
Commit Queue
committed
[vm/compiler] Use a block size of 16 bytes for ARM64 MemoryCopy.
TEST=vm/cc/IRTest_Memory, co19{,_2}/LibTest/typed_data, lib{,_2}/typed_data, corelib{,_2}/list_test Issue: #42072 Cq-Include-Trybots: luci.dart.try:vm-mac-debug-arm64-try,vm-aot-linux-release-simarm64-try,vm-aot-mac-release-arm64-try,vm-linux-release-simarm64-try,vm-mac-release-arm64-try,vm-aot-android-release-arm64c-try,vm-ffi-android-debug-arm64c-try,vm-ffi-qemu-linux-release-arm-try,vm-ffi-qemu-linux-release-riscv64-try,vm-linux-debug-simriscv64-try,vm-linux-release-simarm-try Change-Id: Ife645a1d09be862d74e198162b124e657878280a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/324683 Commit-Queue: Tess Strickland <[email protected]> Reviewed-by: Alexander Markov <[email protected]>
1 parent 00d883f commit 959f21a

File tree

3 files changed

+42
-59
lines changed

3 files changed

+42
-59
lines changed

runtime/vm/compiler/backend/il_arm.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,21 +271,22 @@ static void CopyUpToWordMultiple(FlowGraphCompiler* compiler,
271271
ASSERT(Utils::IsPowerOfTwo(element_size));
272272
if (element_size >= compiler::target::kWordSize) return;
273273

274-
const intptr_t base_shift = (unboxed_inputs ? 0 : kSmiTagShift) -
275-
Utils::ShiftForPowerOfTwo(element_size);
274+
const intptr_t element_shift = Utils::ShiftForPowerOfTwo(element_size);
275+
const intptr_t base_shift =
276+
(unboxed_inputs ? 0 : kSmiTagShift) - element_shift;
276277
auto const mode =
277278
reversed ? compiler::Address::NegPreIndex : compiler::Address::PostIndex;
278279
intptr_t tested_bits = 0;
279280

280281
__ Comment("Copying until region is a multiple of word size");
281282

282-
for (intptr_t bit = compiler::target::kWordSizeLog2 - 1; bit >= 0; bit--) {
283+
for (intptr_t bit = compiler::target::kWordSizeLog2 - 1; bit >= element_shift;
284+
bit--) {
283285
const intptr_t bytes = 1 << bit;
284-
if (element_size > bytes) continue;
285-
auto const sz = OperandSizeFor(bytes);
286286
const intptr_t tested_bit = bit + base_shift;
287287
tested_bits |= (1 << tested_bit);
288288
__ tst(length_reg, compiler::Operand(1 << tested_bit));
289+
auto const sz = OperandSizeFor(bytes);
289290
__ LoadFromOffset(TMP, compiler::Address(src_reg, bytes, mode), sz,
290291
NOT_ZERO);
291292
__ StoreToOffset(TMP, compiler::Address(dest_reg, bytes, mode), sz,

runtime/vm/compiler/backend/il_arm64.cc

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -192,35 +192,37 @@ static compiler::OperandSize OperandSizeFor(intptr_t bytes) {
192192
}
193193
}
194194

195-
static void CopyUpToWordMultiple(FlowGraphCompiler* compiler,
196-
Register dest_reg,
197-
Register src_reg,
198-
Register length_reg,
199-
intptr_t element_size,
200-
bool unboxed_inputs,
201-
bool reversed,
202-
compiler::Label* done) {
195+
static void CopyUpToMultipleOfChunkSize(FlowGraphCompiler* compiler,
196+
Register dest_reg,
197+
Register src_reg,
198+
Register length_reg,
199+
intptr_t element_size,
200+
bool unboxed_inputs,
201+
bool reversed,
202+
intptr_t chunk_size,
203+
compiler::Label* done) {
203204
ASSERT(Utils::IsPowerOfTwo(element_size));
204-
if (element_size >= compiler::target::kWordSize) return;
205+
if (element_size >= chunk_size) return;
205206

206-
const intptr_t base_shift = (unboxed_inputs ? 0 : kSmiTagShift) -
207-
Utils::ShiftForPowerOfTwo(element_size);
207+
const intptr_t element_shift = Utils::ShiftForPowerOfTwo(element_size);
208+
const intptr_t base_shift =
209+
(unboxed_inputs ? 0 : kSmiTagShift) - element_shift;
208210
const intptr_t offset_sign = reversed ? -1 : 1;
209211
auto const mode =
210212
reversed ? compiler::Address::PreIndex : compiler::Address::PostIndex;
211213
intptr_t tested_bits = 0;
212214

213-
__ Comment("Copying until region is a multiple of word size");
215+
__ Comment("Copying until region size is a multiple of chunk size");
214216

215-
for (intptr_t bit = compiler::target::kWordSizeLog2 - 1; bit >= 0; bit--) {
217+
for (intptr_t bit = Utils::ShiftForPowerOfTwo(chunk_size) - 1;
218+
bit >= element_shift; bit--) {
216219
const intptr_t bytes = 1 << bit;
217-
if (element_size > bytes) continue;
218-
auto const sz = OperandSizeFor(bytes);
219220
const intptr_t tested_bit = bit + base_shift;
220221
tested_bits |= (1 << tested_bit);
221222
const intptr_t offset = offset_sign * bytes;
222223
compiler::Label skip_copy;
223224
__ tbz(&skip_copy, length_reg, tested_bit);
225+
auto const sz = OperandSizeFor(bytes);
224226
__ ldr(TMP, compiler::Address(src_reg, offset, mode), sz);
225227
__ str(TMP, compiler::Address(dest_reg, offset, mode), sz);
226228
__ Bind(&skip_copy);
@@ -262,45 +264,24 @@ void MemoryCopyInstr::EmitLoopCopy(FlowGraphCompiler* compiler,
262264
__ add(dest_reg, dest_reg, compiler::Operand(length_reg, LSL, shift));
263265
}
264266
}
265-
CopyUpToWordMultiple(compiler, dest_reg, src_reg, length_reg, element_size_,
266-
unboxed_inputs_, reversed, done);
267+
const intptr_t kChunkSize = 16;
268+
ASSERT(kChunkSize >= element_size_);
269+
CopyUpToMultipleOfChunkSize(compiler, dest_reg, src_reg, length_reg,
270+
element_size_, unboxed_inputs_, reversed,
271+
kChunkSize, done);
272+
// The size of the uncopied region is now a multiple of the chunk size.
273+
const intptr_t loop_subtract = (kChunkSize / element_size_)
274+
<< (unboxed_inputs_ ? 0 : kSmiTagShift);
267275
// When reversed, the src and dest registers are adjusted to start with the
268276
// end addresses, so apply the negated offset prior to indexing.
269-
const intptr_t offset =
270-
(reversed ? -1 : 1) *
271-
Utils::Maximum<intptr_t>(compiler::target::kWordSize, element_size_);
272-
const auto mode = element_size_ == 16
273-
? (reversed ? compiler::Address::PairPreIndex
274-
: compiler::Address::PairPostIndex)
275-
: (reversed ? compiler::Address::PreIndex
276-
: compiler::Address::PostIndex);
277-
// The size of the uncopied region is a multiple of the word size, so now we
278-
// copy the rest by word (unless the element size is larger).
279-
const intptr_t loop_subtract =
280-
Utils::Maximum<intptr_t>(1, compiler::target::kWordSize / element_size_)
281-
<< (unboxed_inputs_ ? 0 : kSmiTagShift);
282-
const auto src_address = compiler::Address(src_reg, offset, mode);
283-
const auto dest_address = compiler::Address(dest_reg, offset, mode);
284-
__ Comment("Copying by multiples of word size");
277+
const intptr_t offset = (reversed ? -1 : 1) * kChunkSize;
278+
const auto mode = reversed ? compiler::Address::PairPreIndex
279+
: compiler::Address::PairPostIndex;
280+
__ Comment("Copying chunks at a time");
285281
compiler::Label loop;
286282
__ Bind(&loop);
287-
switch (element_size_) {
288-
// Fall through for the sizes smaller than compiler::target::kWordSize.
289-
case 1:
290-
case 2:
291-
case 4:
292-
case 8:
293-
__ ldr(TMP, src_address, compiler::kEightBytes);
294-
__ str(TMP, dest_address, compiler::kEightBytes);
295-
break;
296-
case 16:
297-
__ ldp(TMP, TMP2, src_address, compiler::kEightBytes);
298-
__ stp(TMP, TMP2, dest_address, compiler::kEightBytes);
299-
break;
300-
default:
301-
UNREACHABLE();
302-
break;
303-
}
283+
__ ldp(TMP, TMP2, compiler::Address(src_reg, offset, mode));
284+
__ stp(TMP, TMP2, compiler::Address(dest_reg, offset, mode));
304285
__ subs(length_reg, length_reg, compiler::Operand(loop_subtract),
305286
compiler::kObjectBytes);
306287
__ b(&loop, NOT_ZERO);

runtime/vm/compiler/backend/il_riscv.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -292,17 +292,18 @@ static void CopyUpToWordMultiple(FlowGraphCompiler* compiler,
292292
ASSERT(Utils::IsPowerOfTwo(element_size));
293293
if (element_size >= compiler::target::kWordSize) return;
294294

295-
const intptr_t base_shift = (unboxed_inputs ? 0 : kSmiTagShift) -
296-
Utils::ShiftForPowerOfTwo(element_size);
295+
const intptr_t element_shift = Utils::ShiftForPowerOfTwo(element_size);
296+
const intptr_t base_shift =
297+
(unboxed_inputs ? 0 : kSmiTagShift) - element_shift;
297298
intptr_t tested_bits = 0;
298299

299300
__ Comment("Copying until region is a multiple of word size");
300301

301302
COMPILE_ASSERT(XLEN <= 128);
302303

303-
for (intptr_t bit = compiler::target::kWordSizeLog2 - 1; bit >= 0; bit--) {
304+
for (intptr_t bit = compiler::target::kWordSizeLog2 - 1; bit >= element_shift;
305+
bit--) {
304306
const intptr_t bytes = 1 << bit;
305-
if (element_size > bytes) continue;
306307
const intptr_t tested_bit = bit + base_shift;
307308
tested_bits |= 1 << tested_bit;
308309
compiler::Label skip_copy;

0 commit comments

Comments
 (0)