@@ -262,31 +262,29 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
262
262
// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
263
263
// hooks (e.g. lowerInterleavedScalableLoad) expect ABCD, so we need
264
264
// to reorder them by interleaving these values.
265
- static void interleaveLeafValues (SmallVectorImpl<Value *> &Leaves) {
266
- unsigned Factor = Leaves.size ();
267
- assert (isPowerOf2_32 (Factor) && Factor <= 8 && Factor > 1 );
268
-
269
- if (Factor == 2 )
265
+ static void interleaveLeafValues (MutableArrayRef<Value *> SubLeaves) {
266
+ int NumLeaves = SubLeaves.size ();
267
+ if (NumLeaves == 2 )
270
268
return ;
271
269
272
- SmallVector<Value *, 8 > Buffer;
273
- if (Factor == 4 ) {
274
- for (unsigned SrcIdx : {0 , 2 , 1 , 3 })
275
- Buffer.push_back (Leaves[SrcIdx]);
276
- } else {
277
- // Factor of 8.
278
- //
279
- // A E C G B F D H
280
- // |_| |_| |_| |_|
281
- // |___| |___|
282
- // |_______|
283
- // |
284
- // A B C D E F G H
285
- for (unsigned SrcIdx : {0 , 4 , 2 , 6 , 1 , 5 , 3 , 7 })
286
- Buffer.push_back (Leaves[SrcIdx]);
287
- }
270
+ assert (isPowerOf2_32 (NumLeaves) && NumLeaves > 1 );
271
+
272
+ const int HalfLeaves = NumLeaves / 2 ;
273
+ // Visit the sub-trees.
274
+ interleaveLeafValues (SubLeaves.take_front (HalfLeaves));
275
+ interleaveLeafValues (SubLeaves.drop_front (HalfLeaves));
288
276
289
- llvm::copy (Buffer, Leaves.begin ());
277
+ SmallVector<Value *, 8 > Buffer;
278
+ // The step is alternating between +half and -half+1. We exit the
279
+ // loop right before the last element because given the fact that
280
+ // SubLeaves always has an even number of elements, the last element
281
+ // will never be moved and the last to be visited. This simplifies
282
+ // the exit condition.
283
+ for (int i = 0 ; i < NumLeaves - 1 ;
284
+ (i < HalfLeaves) ? i += HalfLeaves : i += (1 - HalfLeaves))
285
+ Buffer.push_back (SubLeaves[i]);
286
+
287
+ llvm::copy (Buffer, SubLeaves.begin ());
290
288
}
291
289
292
290
static unsigned getVectorInterleaveFactor (IntrinsicInst *II,
@@ -353,7 +351,7 @@ static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
353
351
return std::nullopt;
354
352
}
355
353
356
- static unsigned getVectorDeInterleaveFactor (IntrinsicInst *II,
354
+ static unsigned getVectorDeinterleaveFactor (IntrinsicInst *II,
357
355
SmallVectorImpl<Value *> &Results) {
358
356
using namespace PatternMatch ;
359
357
if (II->getIntrinsicID () != Intrinsic::vector_deinterleave2 ||
@@ -370,7 +368,7 @@ static unsigned getVectorDeInterleaveFactor(IntrinsicInst *II,
370
368
Queue.erase (Queue.begin ());
371
369
assert (Current->hasNUses (2 ));
372
370
373
- unsigned VisitedIdx = 0 ;
371
+ ExtractValueInst *LHS = nullptr , *RHS = nullptr ;
374
372
for (User *Usr : Current->users ()) {
375
373
// We're playing safe here and matching only the expression
376
374
// consisting of a perfectly balanced binary tree in which all
@@ -380,38 +378,26 @@ static unsigned getVectorDeInterleaveFactor(IntrinsicInst *II,
380
378
381
379
auto *EV = cast<ExtractValueInst>(Usr);
382
380
ArrayRef<unsigned > Indices = EV->getIndices ();
383
- if (Indices.size () != 1 || Indices[ 0 ] >= 2 )
381
+ if (Indices.size () != 1 )
384
382
return 0 ;
385
383
386
- // The idea is that we don't want to have two extractvalue
387
- // on the same index. So we XOR (1 << index) onto VisitedIdx
388
- // such that if there is any duplication, VisitedIdx will be
389
- // zero.
390
- VisitedIdx ^= (1 << Indices[0 ]);
391
- if (!VisitedIdx)
384
+ if (Indices[0 ] == 0 && !LHS)
385
+ LHS = EV;
386
+ else if (Indices[0 ] == 1 && !RHS)
387
+ RHS = EV;
388
+ else
392
389
return 0 ;
393
- // We have a legal index. At this point we're either going
394
- // to continue the traversal or push the leaf values into Results.
395
- // But in either cases we need to follow the order imposed by
396
- // ExtractValue's indices and swap with the last element pushed
397
- // into Queue/Results if necessary (This is also one of the main
398
- // reasons using BFS instead of DFS here, btw).
399
-
400
- // When VisitedIdx equals to 0b11, we're the last visted ExtractValue.
401
- // So if the current index is 0, we need to swap. Conversely, when
402
- // we're either the first visited ExtractValue or the last operand
403
- // in Queue/Results is of index 0, there is no need to swap.
404
- bool SwapWithLast = VisitedIdx == 0b11 && Indices[0 ] == 0 ;
390
+ }
405
391
392
+ // We have legal indices. At this point we're either going
393
+ // to continue the traversal or push the leaf values into Results.
394
+ for (ExtractValueInst *EV : {LHS, RHS}) {
406
395
// Continue the traversal.
407
396
if (match (EV->user_back (),
408
397
m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
409
398
EV->user_back ()->hasNUses (2 )) {
410
399
auto *EVUsr = cast<IntrinsicInst>(EV->user_back ());
411
- if (SwapWithLast && !Queue.empty ())
412
- Queue.insert (Queue.end () - 1 , EVUsr);
413
- else
414
- Queue.push_back (EVUsr);
400
+ Queue.push_back (EVUsr);
415
401
continue ;
416
402
}
417
403
@@ -421,10 +407,7 @@ static unsigned getVectorDeInterleaveFactor(IntrinsicInst *II,
421
407
return 0 ;
422
408
423
409
// Save the leaf value.
424
- if (SwapWithLast && !Results.empty ())
425
- Results.insert (Results.end () - 1 , EV);
426
- else
427
- Results.push_back (EV);
410
+ Results.push_back (EV);
428
411
429
412
++Factor;
430
413
}
@@ -673,7 +656,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
673
656
IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
674
657
if (auto *VPLoad = dyn_cast<VPIntrinsic>(DI->getOperand (0 ))) {
675
658
SmallVector<Value *, 8 > DeInterleaveResults;
676
- unsigned Factor = getVectorDeInterleaveFactor (DI, DeInterleaveResults);
659
+ unsigned Factor = getVectorDeinterleaveFactor (DI, DeInterleaveResults);
677
660
if (!Factor)
678
661
return false ;
679
662
0 commit comments