@@ -192,35 +192,37 @@ static compiler::OperandSize OperandSizeFor(intptr_t bytes) {
192
192
}
193
193
}
194
194
195
- static void CopyUpToWordMultiple (FlowGraphCompiler* compiler,
196
- Register dest_reg,
197
- Register src_reg,
198
- Register length_reg,
199
- intptr_t element_size,
200
- bool unboxed_inputs,
201
- bool reversed,
202
- compiler::Label* done) {
195
+ static void CopyUpToMultipleOfChunkSize (FlowGraphCompiler* compiler,
196
+ Register dest_reg,
197
+ Register src_reg,
198
+ Register length_reg,
199
+ intptr_t element_size,
200
+ bool unboxed_inputs,
201
+ bool reversed,
202
+ intptr_t chunk_size,
203
+ compiler::Label* done) {
203
204
ASSERT (Utils::IsPowerOfTwo (element_size));
204
- if (element_size >= compiler::target:: kWordSize ) return ;
205
+ if (element_size >= chunk_size ) return ;
205
206
206
- const intptr_t base_shift = (unboxed_inputs ? 0 : kSmiTagShift ) -
207
- Utils::ShiftForPowerOfTwo (element_size);
207
+ const intptr_t element_shift = Utils::ShiftForPowerOfTwo (element_size);
208
+ const intptr_t base_shift =
209
+ (unboxed_inputs ? 0 : kSmiTagShift ) - element_shift;
208
210
const intptr_t offset_sign = reversed ? -1 : 1 ;
209
211
auto const mode =
210
212
reversed ? compiler::Address::PreIndex : compiler::Address::PostIndex;
211
213
intptr_t tested_bits = 0 ;
212
214
213
- __ Comment (" Copying until region is a multiple of word size" );
215
+ __ Comment (" Copying until region size is a multiple of chunk size" );
214
216
215
- for (intptr_t bit = compiler::target::kWordSizeLog2 - 1 ; bit >= 0 ; bit--) {
217
+ for (intptr_t bit = Utils::ShiftForPowerOfTwo (chunk_size) - 1 ;
218
+ bit >= element_shift; bit--) {
216
219
const intptr_t bytes = 1 << bit;
217
- if (element_size > bytes) continue ;
218
- auto const sz = OperandSizeFor (bytes);
219
220
const intptr_t tested_bit = bit + base_shift;
220
221
tested_bits |= (1 << tested_bit);
221
222
const intptr_t offset = offset_sign * bytes;
222
223
compiler::Label skip_copy;
223
224
__ tbz (&skip_copy, length_reg, tested_bit);
225
+ auto const sz = OperandSizeFor (bytes);
224
226
__ ldr (TMP, compiler::Address (src_reg, offset, mode), sz);
225
227
__ str (TMP, compiler::Address (dest_reg, offset, mode), sz);
226
228
__ Bind (&skip_copy);
@@ -262,45 +264,24 @@ void MemoryCopyInstr::EmitLoopCopy(FlowGraphCompiler* compiler,
262
264
__ add (dest_reg, dest_reg, compiler::Operand (length_reg, LSL, shift));
263
265
}
264
266
}
265
- CopyUpToWordMultiple (compiler, dest_reg, src_reg, length_reg, element_size_,
266
- unboxed_inputs_, reversed, done);
267
+ const intptr_t kChunkSize = 16 ;
268
+ ASSERT (kChunkSize >= element_size_);
269
+ CopyUpToMultipleOfChunkSize (compiler, dest_reg, src_reg, length_reg,
270
+ element_size_, unboxed_inputs_, reversed,
271
+ kChunkSize , done);
272
+ // The size of the uncopied region is now a multiple of the chunk size.
273
+ const intptr_t loop_subtract = (kChunkSize / element_size_)
274
+ << (unboxed_inputs_ ? 0 : kSmiTagShift );
267
275
// When reversed, the src and dest registers are adjusted to start with the
268
276
// end addresses, so apply the negated offset prior to indexing.
269
- const intptr_t offset =
270
- (reversed ? -1 : 1 ) *
271
- Utils::Maximum<intptr_t >(compiler::target::kWordSize , element_size_);
272
- const auto mode = element_size_ == 16
273
- ? (reversed ? compiler::Address::PairPreIndex
274
- : compiler::Address::PairPostIndex)
275
- : (reversed ? compiler::Address::PreIndex
276
- : compiler::Address::PostIndex);
277
- // The size of the uncopied region is a multiple of the word size, so now we
278
- // copy the rest by word (unless the element size is larger).
279
- const intptr_t loop_subtract =
280
- Utils::Maximum<intptr_t >(1 , compiler::target::kWordSize / element_size_)
281
- << (unboxed_inputs_ ? 0 : kSmiTagShift );
282
- const auto src_address = compiler::Address (src_reg, offset, mode);
283
- const auto dest_address = compiler::Address (dest_reg, offset, mode);
284
- __ Comment (" Copying by multiples of word size" );
277
+ const intptr_t offset = (reversed ? -1 : 1 ) * kChunkSize ;
278
+ const auto mode = reversed ? compiler::Address::PairPreIndex
279
+ : compiler::Address::PairPostIndex;
280
+ __ Comment (" Copying chunks at a time" );
285
281
compiler::Label loop;
286
282
__ Bind (&loop);
287
- switch (element_size_) {
288
- // Fall through for the sizes smaller than compiler::target::kWordSize.
289
- case 1 :
290
- case 2 :
291
- case 4 :
292
- case 8 :
293
- __ ldr (TMP, src_address, compiler::kEightBytes );
294
- __ str (TMP, dest_address, compiler::kEightBytes );
295
- break ;
296
- case 16 :
297
- __ ldp (TMP, TMP2, src_address, compiler::kEightBytes );
298
- __ stp (TMP, TMP2, dest_address, compiler::kEightBytes );
299
- break ;
300
- default :
301
- UNREACHABLE ();
302
- break ;
303
- }
283
+ __ ldp (TMP, TMP2, compiler::Address (src_reg, offset, mode));
284
+ __ stp (TMP, TMP2, compiler::Address (dest_reg, offset, mode));
304
285
__ subs (length_reg, length_reg, compiler::Operand (loop_subtract),
305
286
compiler::kObjectBytes );
306
287
__ b (&loop, NOT_ZERO);
0 commit comments