@@ -635,11 +635,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
635
635
return true ;
636
636
}
637
637
638
+ // / Check the interleaved mask
639
+ // /
640
+ // / - if a value within the optional is non-nullptr, the value corresponds to
641
+ // / deinterleaved mask
642
+ // / - if a value within the option is nullptr, the value corresponds to all-true
643
+ // / mask
644
+ // / - return nullopt if mask cannot be deinterleaved
645
+ static std::optional<Value *> getMask (Value *WideMask, unsigned Factor) {
646
+ using namespace llvm ::PatternMatch;
647
+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
648
+ SmallVector<Value *, 8 > Operands;
649
+ SmallVector<Instruction *, 8 > DeadInsts;
650
+ if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
651
+ assert (!Operands.empty ());
652
+ if (Operands.size () == Factor &&
653
+ std::equal (Operands.begin (), Operands.end (), Operands.begin ()))
654
+ return Operands.front ();
655
+ }
656
+ }
657
+ if (match (WideMask, m_AllOnes ()))
658
+ return nullptr ;
659
+ return std::nullopt;
660
+ }
661
+
638
662
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
639
663
IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
640
- LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand (0 ));
641
-
642
- if (!LI || !LI->hasOneUse () || !LI->isSimple ())
664
+ Value *LoadedVal = DI->getOperand (0 );
665
+ if (!LoadedVal->hasOneUse () || !isa<LoadInst, VPIntrinsic>(LoadedVal))
643
666
return false ;
644
667
645
668
SmallVector<Value *, 8 > DeinterleaveValues;
@@ -648,43 +671,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
648
671
DeinterleaveDeadInsts))
649
672
return false ;
650
673
651
- LLVM_DEBUG (dbgs () << " IA: Found a deinterleave intrinsic: " << *DI
652
- << " with factor = " << DeinterleaveValues.size () << " \n " );
674
+ const unsigned Factor = DeinterleaveValues.size ();
653
675
654
- // Try and match this with target specific intrinsics.
655
- if (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
656
- return false ;
676
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
677
+ if (VPLoad->getIntrinsicID () != Intrinsic::vp_load)
678
+ return false ;
679
+ // Check mask operand. Handle both all-true and interleaved mask.
680
+ Value *WideMask = VPLoad->getOperand (1 );
681
+ std::optional<Value *> Mask = getMask (WideMask, Factor);
682
+ if (!Mask)
683
+ return false ;
684
+
685
+ LLVM_DEBUG (dbgs () << " IA: Found a vp.load with deinterleave intrinsic "
686
+ << *DI << " and factor = " << Factor << " \n " );
687
+
688
+ // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
689
+ // TLI function to emit target-specific interleaved instruction.
690
+ if (!TLI->lowerInterleavedScalableLoad (VPLoad, *Mask, DI,
691
+ DeinterleaveValues))
692
+ return false ;
693
+
694
+ } else {
695
+ auto *LI = cast<LoadInst>(LoadedVal);
696
+ if (!LI->isSimple ())
697
+ return false ;
698
+
699
+ LLVM_DEBUG (dbgs () << " IA: Found a load with deinterleave intrinsic " << *DI
700
+ << " and factor = " << Factor << " \n " );
701
+
702
+ // Try and match this with target specific intrinsics.
703
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
704
+ return false ;
705
+ }
657
706
658
707
DeadInsts.insert (DeinterleaveDeadInsts.begin (), DeinterleaveDeadInsts.end ());
659
708
// We now have a target-specific load, so delete the old one.
660
- DeadInsts.insert (LI );
709
+ DeadInsts.insert (cast<Instruction>(LoadedVal) );
661
710
return true ;
662
711
}
663
712
664
713
bool InterleavedAccessImpl::lowerInterleaveIntrinsic (
665
714
IntrinsicInst *II, SmallSetVector<Instruction *, 32 > &DeadInsts) {
666
715
if (!II->hasOneUse ())
667
716
return false ;
668
-
669
- StoreInst *SI = dyn_cast<StoreInst>(*(II->users ().begin ()));
670
-
671
- if (!SI || !SI->isSimple ())
717
+ Value *StoredBy = II->user_back ();
718
+ if (!isa<StoreInst, VPIntrinsic>(StoredBy))
672
719
return false ;
673
720
674
721
SmallVector<Value *, 8 > InterleaveValues;
675
722
SmallVector<Instruction *, 8 > InterleaveDeadInsts;
676
723
if (!getVectorInterleaveFactor (II, InterleaveValues, InterleaveDeadInsts))
677
724
return false ;
678
725
679
- LLVM_DEBUG (dbgs () << " IA: Found an interleave intrinsic: " << *II
680
- << " with factor = " << InterleaveValues.size () << " \n " );
726
+ const unsigned Factor = InterleaveValues.size ();
681
727
682
- // Try and match this with target specific intrinsics.
683
- if (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
684
- return false ;
728
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
729
+ if (VPStore->getIntrinsicID () != Intrinsic::vp_store)
730
+ return false ;
731
+
732
+ Value *WideMask = VPStore->getOperand (2 );
733
+ std::optional<Value *> Mask = getMask (WideMask, Factor);
734
+ if (!Mask)
735
+ return false ;
736
+
737
+ LLVM_DEBUG (dbgs () << " IA: Found a vp.store with interleave intrinsic "
738
+ << *II << " and factor = " << Factor << " \n " );
739
+
740
+ // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741
+ // TLI function to emit target-specific interleaved instruction.
742
+ if (!TLI->lowerInterleavedScalableStore (VPStore, *Mask, II,
743
+ InterleaveValues))
744
+ return false ;
745
+ } else {
746
+ auto *SI = cast<StoreInst>(StoredBy);
747
+ if (!SI->isSimple ())
748
+ return false ;
749
+
750
+ LLVM_DEBUG (dbgs () << " IA: Found a store with interleave intrinsic " << *II
751
+ << " and factor = " << Factor << " \n " );
752
+
753
+ // Try and match this with target specific intrinsics.
754
+ if (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
755
+ return false ;
756
+ }
685
757
686
758
// We now have a target-specific store, so delete the old one.
687
- DeadInsts.insert (SI );
759
+ DeadInsts.insert (cast<Instruction>(StoredBy) );
688
760
DeadInsts.insert (InterleaveDeadInsts.begin (), InterleaveDeadInsts.end ());
689
761
return true ;
690
762
}
0 commit comments