@@ -635,11 +635,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
635635 return true ;
636636}
637637
638+ // / Check the interleaved mask
639+ // /
640+ // / - if a value within the optional is non-nullptr, the value corresponds to
641+ // / deinterleaved mask
642+ // / - if a value within the option is nullptr, the value corresponds to all-true
643+ // / mask
644+ // / - return nullopt if mask cannot be deinterleaved
645+ static std::optional<Value *> getMask (Value *WideMask, unsigned Factor) {
646+ using namespace llvm ::PatternMatch;
647+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
648+ SmallVector<Value *, 8 > Operands;
649+ SmallVector<Instruction *, 8 > DeadInsts;
650+ if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
651+ assert (!Operands.empty ());
652+ if (Operands.size () == Factor &&
653+ std::equal (Operands.begin (), Operands.end (), Operands.begin ()))
654+ return Operands.front ();
655+ }
656+ }
657+ if (match (WideMask, m_AllOnes ()))
658+ return nullptr ;
659+ return std::nullopt ;
660+ }
661+
638662bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
639663 IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
640- LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand (0 ));
641-
642- if (!LI || !LI->hasOneUse () || !LI->isSimple ())
664+ Value *LoadedVal = DI->getOperand (0 );
665+ if (!LoadedVal->hasOneUse () || !isa<LoadInst, VPIntrinsic>(LoadedVal))
643666 return false ;
644667
645668 SmallVector<Value *, 8 > DeinterleaveValues;
@@ -648,43 +671,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
648671 DeinterleaveDeadInsts))
649672 return false ;
650673
651- LLVM_DEBUG (dbgs () << " IA: Found a deinterleave intrinsic: " << *DI
652- << " with factor = " << DeinterleaveValues.size () << " \n " );
674+ const unsigned Factor = DeinterleaveValues.size ();
653675
654- // Try and match this with target specific intrinsics.
655- if (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
656- return false ;
676+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
677+ if (VPLoad->getIntrinsicID () != Intrinsic::vp_load)
678+ return false ;
679+ // Check mask operand. Handle both all-true and interleaved mask.
680+ Value *WideMask = VPLoad->getOperand (1 );
681+ std::optional<Value *> Mask = getMask (WideMask, Factor);
682+ if (!Mask)
683+ return false ;
684+
685+ LLVM_DEBUG (dbgs () << " IA: Found a vp.load with deinterleave intrinsic "
686+ << *DI << " and factor = " << Factor << " \n " );
687+
688+ // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
689+ // TLI function to emit target-specific interleaved instruction.
690+ if (!TLI->lowerInterleavedScalableLoad (VPLoad, *Mask, DI,
691+ DeinterleaveValues))
692+ return false ;
693+
694+ } else {
695+ auto *LI = cast<LoadInst>(LoadedVal);
696+ if (!LI->isSimple ())
697+ return false ;
698+
699+ LLVM_DEBUG (dbgs () << " IA: Found a load with deinterleave intrinsic " << *DI
700+ << " and factor = " << Factor << " \n " );
701+
702+ // Try and match this with target specific intrinsics.
703+ if (!TLI->lowerDeinterleaveIntrinsicToLoad (DI, LI, DeinterleaveValues))
704+ return false ;
705+ }
657706
658707 DeadInsts.insert (DeinterleaveDeadInsts.begin (), DeinterleaveDeadInsts.end ());
659708 // We now have a target-specific load, so delete the old one.
660- DeadInsts.insert (LI );
709+ DeadInsts.insert (cast<Instruction>(LoadedVal) );
661710 return true ;
662711}
663712
664713bool InterleavedAccessImpl::lowerInterleaveIntrinsic (
665714 IntrinsicInst *II, SmallSetVector<Instruction *, 32 > &DeadInsts) {
666715 if (!II->hasOneUse ())
667716 return false ;
668-
669- StoreInst *SI = dyn_cast<StoreInst>(*(II->users ().begin ()));
670-
671- if (!SI || !SI->isSimple ())
717+ Value *StoredBy = II->user_back ();
718+ if (!isa<StoreInst, VPIntrinsic>(StoredBy))
672719 return false ;
673720
674721 SmallVector<Value *, 8 > InterleaveValues;
675722 SmallVector<Instruction *, 8 > InterleaveDeadInsts;
676723 if (!getVectorInterleaveFactor (II, InterleaveValues, InterleaveDeadInsts))
677724 return false ;
678725
679- LLVM_DEBUG (dbgs () << " IA: Found an interleave intrinsic: " << *II
680- << " with factor = " << InterleaveValues.size () << " \n " );
726+ const unsigned Factor = InterleaveValues.size ();
681727
682- // Try and match this with target specific intrinsics.
683- if (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
684- return false ;
728+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
729+ if (VPStore->getIntrinsicID () != Intrinsic::vp_store)
730+ return false ;
731+
732+ Value *WideMask = VPStore->getOperand (2 );
733+ std::optional<Value *> Mask = getMask (WideMask, Factor);
734+ if (!Mask)
735+ return false ;
736+
737+ LLVM_DEBUG (dbgs () << " IA: Found a vp.store with interleave intrinsic "
738+ << *II << " and factor = " << Factor << " \n " );
739+
740+ // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741+ // TLI function to emit target-specific interleaved instruction.
742+ if (!TLI->lowerInterleavedScalableStore (VPStore, *Mask, II,
743+ InterleaveValues))
744+ return false ;
745+ } else {
746+ auto *SI = cast<StoreInst>(StoredBy);
747+ if (!SI->isSimple ())
748+ return false ;
749+
750+ LLVM_DEBUG (dbgs () << " IA: Found a store with interleave intrinsic " << *II
751+ << " and factor = " << Factor << " \n " );
752+
753+ // Try and match this with target specific intrinsics.
754+ if (!TLI->lowerInterleaveIntrinsicToStore (II, SI, InterleaveValues))
755+ return false ;
756+ }
685757
686758 // We now have a target-specific store, so delete the old one.
687- DeadInsts.insert (SI );
759+ DeadInsts.insert (cast<Instruction>(StoredBy) );
688760 DeadInsts.insert (InterleaveDeadInsts.begin (), InterleaveDeadInsts.end ());
689761 return true ;
690762}
0 commit comments