Skip to content

Commit 42f7ba7

Browse files
mshockwavenpanchen
andcommitted
[IA][RISCV] Support VP intrinsics in InterleavedAccessPass
Teach InterleavedAccessPass to recognize the following patterns: - vp.store an interleaved scalable vector - Deinterleaving a scalable vector loaded from vp.load Upon recognizing these patterns, IA will collect the interleaved / deinterleaved operands and delegate them over to their respective newly-added TLI hooks. For RISC-V, these patterns are lowered into segmented loads/stores (except when we're interleaving constant splats, in which case a unit-strde store will be generated) Right now we only recognized power-of-two (de)interleave cases, in which (de)interleave4/8 are synthesized from a tree of (de)interleave2. Co-authored-by: Nikolay Panchenko <[email protected]>
1 parent 958b507 commit 42f7ba7

File tree

5 files changed

+1005
-19
lines changed

5 files changed

+1005
-19
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class TargetRegisterClass;
9494
class TargetRegisterInfo;
9595
class TargetTransformInfo;
9696
class Value;
97+
class VPIntrinsic;
9798

9899
namespace Sched {
99100

@@ -3152,6 +3153,34 @@ class TargetLoweringBase {
31523153
return false;
31533154
}
31543155

3156+
/// Lower an interleaved load to target specific intrinsics. Return
3157+
/// true on success.
3158+
///
3159+
/// \p Load is a vp.load instruction.
3160+
/// \p Mask is a mask value
3161+
/// \p DeinterleaveIntrin is vector.deinterleave intrinsic
3162+
/// \p DeinterleaveRes is a list of deinterleaved results.
3163+
virtual bool
3164+
lowerInterleavedScalableLoad(VPIntrinsic *Load, Value *Mask,
3165+
IntrinsicInst *DeinterleaveIntrin,
3166+
ArrayRef<Value *> DeinterleaveRes) const {
3167+
return false;
3168+
}
3169+
3170+
/// Lower an interleaved store to target specific intrinsics. Return
3171+
/// true on success.
3172+
///
3173+
/// \p Store is the vp.store instruction.
3174+
/// \p Mask is a mask value
3175+
/// \p InterleaveIntrin is vector.interleave intrinsic
3176+
/// \p InterleaveOps is a list of values being interleaved.
3177+
virtual bool
3178+
lowerInterleavedScalableStore(VPIntrinsic *Store, Value *Mask,
3179+
IntrinsicInst *InterleaveIntrin,
3180+
ArrayRef<Value *> InterleaveOps) const {
3181+
return false;
3182+
}
3183+
31553184
/// Lower a deinterleave intrinsic to a target specific load intrinsic.
31563185
/// Return true on success. Currently only supports
31573186
/// llvm.vector.deinterleave2

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -635,11 +635,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
635635
return true;
636636
}
637637

638+
/// Check the interleaved mask
639+
///
640+
/// - if a value within the optional is non-nullptr, the value corresponds to
641+
/// deinterleaved mask
642+
/// - if a value within the option is nullptr, the value corresponds to all-true
643+
/// mask
644+
/// - return nullopt if mask cannot be deinterleaved
645+
static std::optional<Value *> getMask(Value *WideMask, unsigned Factor) {
646+
using namespace llvm::PatternMatch;
647+
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
648+
SmallVector<Value *, 8> Operands;
649+
SmallVector<Instruction *, 8> DeadInsts;
650+
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
651+
assert(!Operands.empty());
652+
if (Operands.size() == Factor &&
653+
std::equal(Operands.begin(), Operands.end(), Operands.begin()))
654+
return Operands.front();
655+
}
656+
}
657+
if (match(WideMask, m_AllOnes()))
658+
return nullptr;
659+
return std::nullopt;
660+
}
661+
638662
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
639663
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
640-
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
641-
642-
if (!LI || !LI->hasOneUse() || !LI->isSimple())
664+
Value *LoadedVal = DI->getOperand(0);
665+
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
643666
return false;
644667

645668
SmallVector<Value *, 8> DeinterleaveValues;
@@ -648,43 +671,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
648671
DeinterleaveDeadInsts))
649672
return false;
650673

651-
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
652-
<< " with factor = " << DeinterleaveValues.size() << "\n");
674+
const unsigned Factor = DeinterleaveValues.size();
653675

654-
// Try and match this with target specific intrinsics.
655-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeinterleaveValues))
656-
return false;
676+
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
677+
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
678+
return false;
679+
// Check mask operand. Handle both all-true and interleaved mask.
680+
Value *WideMask = VPLoad->getOperand(1);
681+
std::optional<Value *> Mask = getMask(WideMask, Factor);
682+
if (!Mask)
683+
return false;
684+
685+
LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
686+
<< *DI << " and factor = " << Factor << "\n");
687+
688+
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
689+
// TLI function to emit target-specific interleaved instruction.
690+
if (!TLI->lowerInterleavedScalableLoad(VPLoad, *Mask, DI,
691+
DeinterleaveValues))
692+
return false;
693+
694+
} else {
695+
auto *LI = cast<LoadInst>(LoadedVal);
696+
if (!LI->isSimple())
697+
return false;
698+
699+
LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
700+
<< " and factor = " << Factor << "\n");
701+
702+
// Try and match this with target specific intrinsics.
703+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeinterleaveValues))
704+
return false;
705+
}
657706

658707
DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
659708
// We now have a target-specific load, so delete the old one.
660-
DeadInsts.insert(LI);
709+
DeadInsts.insert(cast<Instruction>(LoadedVal));
661710
return true;
662711
}
663712

664713
bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
665714
IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
666715
if (!II->hasOneUse())
667716
return false;
668-
669-
StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
670-
671-
if (!SI || !SI->isSimple())
717+
Value *StoredBy = II->user_back();
718+
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
672719
return false;
673720

674721
SmallVector<Value *, 8> InterleaveValues;
675722
SmallVector<Instruction *, 8> InterleaveDeadInsts;
676723
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
677724
return false;
678725

679-
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
680-
<< " with factor = " << InterleaveValues.size() << "\n");
726+
const unsigned Factor = InterleaveValues.size();
681727

682-
// Try and match this with target specific intrinsics.
683-
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveValues))
684-
return false;
728+
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
729+
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
730+
return false;
731+
732+
Value *WideMask = VPStore->getOperand(2);
733+
std::optional<Value *> Mask = getMask(WideMask, Factor);
734+
if (!Mask)
735+
return false;
736+
737+
LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
738+
<< *II << " and factor = " << Factor << "\n");
739+
740+
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741+
// TLI function to emit target-specific interleaved instruction.
742+
if (!TLI->lowerInterleavedScalableStore(VPStore, *Mask, II,
743+
InterleaveValues))
744+
return false;
745+
} else {
746+
auto *SI = cast<StoreInst>(StoredBy);
747+
if (!SI->isSimple())
748+
return false;
749+
750+
LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
751+
<< " and factor = " << Factor << "\n");
752+
753+
// Try and match this with target specific intrinsics.
754+
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveValues))
755+
return false;
756+
}
685757

686758
// We now have a target-specific store, so delete the old one.
687-
DeadInsts.insert(SI);
759+
DeadInsts.insert(cast<Instruction>(StoredBy));
688760
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
689761
return true;
690762
}

0 commit comments

Comments
 (0)