Skip to content

Commit 156661a

Browse files
mshockwavenpanchen
andcommitted
[IA][RISCV] Support VP intrinsics in InterleavedAccessPass
Teach InterleavedAccessPass to recognize the following patterns: - vp.store an interleaved scalable vector - Deinterleaving a scalable vector loaded from vp.load Upon recognizing these patterns, IA will collect the interleaved / deinterleaved operands and delegate them over to their respective newly-added TLI hooks. For RISC-V, these patterns are lowered into segmented loads/stores (except when we're interleaving constant splats, in which case a unit-strde store will be generated) Right now we only recognized power-of-two (de)interleave cases, in which (de)interleave4/8 are synthesized from a tree of (de)interleave2. Co-authored-by: Nikolay Panchenko <[email protected]>
1 parent bc74a1e commit 156661a

File tree

5 files changed

+1005
-19
lines changed

5 files changed

+1005
-19
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class TargetRegisterClass;
9494
class TargetRegisterInfo;
9595
class TargetTransformInfo;
9696
class Value;
97+
class VPIntrinsic;
9798

9899
namespace Sched {
99100

@@ -3152,6 +3153,34 @@ class TargetLoweringBase {
31523153
return false;
31533154
}
31543155

3156+
/// Lower an interleaved load to target specific intrinsics. Return
3157+
/// true on success.
3158+
///
3159+
/// \p Load is a vp.load instruction.
3160+
/// \p Mask is a mask value
3161+
/// \p DeinterleaveIntrin is vector.deinterleave intrinsic
3162+
/// \p DeinterleaveRes is a list of deinterleaved results.
3163+
virtual bool
3164+
lowerInterleavedScalableLoad(VPIntrinsic *Load, Value *Mask,
3165+
IntrinsicInst *DeinterleaveIntrin,
3166+
ArrayRef<Value *> DeinterleaveRes) const {
3167+
return false;
3168+
}
3169+
3170+
/// Lower an interleaved store to target specific intrinsics. Return
3171+
/// true on success.
3172+
///
3173+
/// \p Store is the vp.store instruction.
3174+
/// \p Mask is a mask value
3175+
/// \p InterleaveIntrin is vector.interleave intrinsic
3176+
/// \p InterleaveOps is a list of values being interleaved.
3177+
virtual bool
3178+
lowerInterleavedScalableStore(VPIntrinsic *Store, Value *Mask,
3179+
IntrinsicInst *InterleaveIntrin,
3180+
ArrayRef<Value *> InterleaveOps) const {
3181+
return false;
3182+
}
3183+
31553184
/// Lower a deinterleave intrinsic to a target specific load intrinsic.
31563185
/// Return true on success. Currently only supports
31573186
/// llvm.vector.deinterleave2

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
630630
return true;
631631
}
632632

633+
/// Check the interleaved mask
634+
///
635+
/// - if a value within the optional is non-nullptr, the value corresponds to
636+
/// deinterleaved mask
637+
/// - if a value within the option is nullptr, the value corresponds to all-true
638+
/// mask
639+
/// - return nullopt if mask cannot be deinterleaved
640+
static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
641+
using namespace llvm::PatternMatch;
642+
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
643+
SmallVector<Value *, 8> Operands;
644+
SmallVector<Instruction *, 8> DeadInsts;
645+
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
646+
assert(!Operands.empty());
647+
if (Operands.size() == Factor &&
648+
std::equal(Operands.begin(), Operands.end(), Operands.begin()))
649+
return Operands.front();
650+
}
651+
}
652+
if (match(WideMask, m_AllOnes()))
653+
return nullptr;
654+
return std::nullopt;
655+
}
656+
633657
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
634658
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
635-
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
636-
637-
if (!LI || !LI->hasOneUse() || !LI->isSimple())
659+
Value *LoadedVal = DI->getOperand(0);
660+
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
638661
return false;
639662

640663
SmallVector<Value *, 8> DeinterleaveValues;
@@ -643,43 +666,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
643666
DeinterleaveDeadInsts))
644667
return false;
645668

646-
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
647-
<< " with factor = " << DeinterleaveValues.size() << "\n");
669+
const unsigned Factor = DeinterleaveValues.size();
648670

649-
// Try and match this with target specific intrinsics.
650-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
651-
return false;
671+
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
672+
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
673+
return false;
674+
// Check mask operand. Handle both all-true and interleaved mask.
675+
Value *WideMask = VPLoad->getOperand(1);
676+
std::optional<Value *> Mask = getMask(WideMask, Factor);
677+
if (!Mask)
678+
return false;
679+
680+
LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
681+
<< *DI << " and factor = " << Factor << "\n");
682+
683+
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
684+
// TLI function to emit target-specific interleaved instruction.
685+
if (!TLI->lowerInterleavedScalableLoad(VPLoad, *Mask, DI,
686+
DeinterleaveValues))
687+
return false;
688+
689+
} else {
690+
auto *LI = cast<LoadInst>(LoadedVal);
691+
if (!LI->isSimple())
692+
return false;
693+
694+
LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
695+
<< " and factor = " << Factor << "\n");
696+
697+
// Try and match this with target specific intrinsics.
698+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
699+
return false;
700+
}
652701

653702
DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
654703
// We now have a target-specific load, so delete the old one.
655-
DeadInsts.insert(LI);
704+
DeadInsts.insert(cast<Instruction>(LoadedVal));
656705
return true;
657706
}
658707

659708
bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
660709
IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
661710
if (!II->hasOneUse())
662711
return false;
663-
664-
StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
665-
666-
if (!SI || !SI->isSimple())
712+
Value *StoredBy = II->user_back();
713+
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
667714
return false;
668715

669716
SmallVector<Value *, 8> InterleaveValues;
670717
SmallVector<Instruction *, 8> InterleaveDeadInsts;
671718
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
672719
return false;
673720

674-
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
675-
<< " with factor = " << InterleaveValues.size() << "\n");
721+
const unsigned Factor = InterleaveValues.size();
676722

677-
// Try and match this with target specific intrinsics.
678-
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
679-
return false;
723+
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
724+
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
725+
return false;
726+
727+
Value *WideMask = VPStore->getOperand(2);
728+
std::optional<Value *> Mask = getMask(WideMask, Factor);
729+
if (!Mask)
730+
return false;
731+
732+
LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
733+
<< *II << " and factor = " << Factor << "\n");
734+
735+
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
736+
// TLI function to emit target-specific interleaved instruction.
737+
if (!TLI->lowerInterleavedScalableStore(VPStore, *Mask, II,
738+
InterleaveValues))
739+
return false;
740+
} else {
741+
auto *SI = cast<StoreInst>(StoredBy);
742+
if (!SI->isSimple())
743+
return false;
744+
745+
LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
746+
<< " and factor = " << Factor << "\n");
747+
748+
// Try and match this with target specific intrinsics.
749+
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
750+
return false;
751+
}
680752

681753
// We now have a target-specific store, so delete the old one.
682-
DeadInsts.insert(SI);
754+
DeadInsts.insert(cast<Instruction>(StoredBy));
683755
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
684756
return true;
685757
}

0 commit comments

Comments
 (0)