Skip to content

Commit cfd84a0

Browse files
mshockwavenpanchen
authored andcommitted
[IA][RISCV] Support VP loads/stores in InterleavedAccessPass (llvm#120490)
Teach InterleavedAccessPass to recognize the following patterns: - vp.store an interleaved scalable vector - Deinterleaving a scalable vector loaded from vp.load Upon recognizing these patterns, IA will collect the interleaved / deinterleaved operands and delegate them over to their respective newly-added TLI hooks. For RISC-V, these patterns are lowered into segmented loads/stores Right now we only recognized power-of-two (de)interleave cases, in which (de)interleave4/8 are synthesized from a tree of (de)interleave2. --------- Co-authored-by: Nikolay Panchenko <[email protected]>
1 parent 711df08 commit cfd84a0

File tree

5 files changed

+1171
-19
lines changed

5 files changed

+1171
-19
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class TargetRegisterClass;
9494
class TargetRegisterInfo;
9595
class TargetTransformInfo;
9696
class Value;
97+
class VPIntrinsic;
9798

9899
namespace Sched {
99100

@@ -3156,6 +3157,30 @@ class TargetLoweringBase {
31563157
return false;
31573158
}
31583159

3160+
/// Lower an interleaved load to target specific intrinsics. Return
3161+
/// true on success.
3162+
///
3163+
/// \p Load is a vp.load instruction.
3164+
/// \p Mask is a mask value
3165+
/// \p DeinterleaveRes is a list of deinterleaved results.
3166+
virtual bool
3167+
lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
3168+
ArrayRef<Value *> DeinterleaveRes) const {
3169+
return false;
3170+
}
3171+
3172+
/// Lower an interleaved store to target specific intrinsics. Return
3173+
/// true on success.
3174+
///
3175+
/// \p Store is the vp.store instruction.
3176+
/// \p Mask is a mask value
3177+
/// \p InterleaveOps is a list of values being interleaved.
3178+
virtual bool
3179+
lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
3180+
ArrayRef<Value *> InterleaveOps) const {
3181+
return false;
3182+
}
3183+
31593184
/// Lower a deinterleave intrinsic to a target specific load intrinsic.
31603185
/// Return true on success. Currently only supports
31613186
/// llvm.vector.deinterleave2

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,37 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
630630
return true;
631631
}
632632

633+
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
634+
// mask.
635+
static Value *getMask(Value *WideMask, unsigned Factor,
636+
VectorType *LeafValueTy) {
637+
using namespace llvm::PatternMatch;
638+
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
639+
SmallVector<Value *, 8> Operands;
640+
SmallVector<Instruction *, 8> DeadInsts;
641+
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
642+
assert(!Operands.empty());
643+
if (Operands.size() == Factor && llvm::all_equal(Operands))
644+
return Operands[0];
645+
}
646+
}
647+
648+
if (match(WideMask, m_AllOnes())) {
649+
// Scale the vector length of all-ones mask.
650+
ElementCount OrigEC =
651+
cast<VectorType>(WideMask->getType())->getElementCount();
652+
assert(OrigEC.getKnownMinValue() % Factor == 0);
653+
return ConstantVector::getSplat(OrigEC.divideCoefficientBy(Factor),
654+
cast<Constant>(WideMask)->getSplatValue());
655+
}
656+
657+
return nullptr;
658+
}
659+
633660
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
634661
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
635-
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
636-
637-
if (!LI || !LI->hasOneUse() || !LI->isSimple())
662+
Value *LoadedVal = DI->getOperand(0);
663+
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
638664
return false;
639665

640666
SmallVector<Value *, 8> DeinterleaveValues;
@@ -643,43 +669,94 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
643669
DeinterleaveDeadInsts))
644670
return false;
645671

646-
LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
647-
<< " with factor = " << DeinterleaveValues.size() << "\n");
672+
const unsigned Factor = DeinterleaveValues.size();
648673

649-
// Try and match this with target specific intrinsics.
650-
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
651-
return false;
674+
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
675+
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
676+
return false;
677+
// Check mask operand. Handle both all-true and interleaved mask.
678+
Value *WideMask = VPLoad->getOperand(1);
679+
Value *Mask = getMask(WideMask, Factor,
680+
cast<VectorType>(DeinterleaveValues[0]->getType()));
681+
if (!Mask)
682+
return false;
683+
684+
LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
685+
<< *DI << " and factor = " << Factor << "\n");
686+
687+
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
688+
// TLI function to emit target-specific interleaved instruction.
689+
if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
690+
DeinterleaveValues))
691+
return false;
692+
693+
} else {
694+
auto *LI = cast<LoadInst>(LoadedVal);
695+
if (!LI->isSimple())
696+
return false;
697+
698+
LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
699+
<< " and factor = " << Factor << "\n");
700+
701+
// Try and match this with target specific intrinsics.
702+
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
703+
return false;
704+
}
652705

653706
DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
654707
// We now have a target-specific load, so delete the old one.
655-
DeadInsts.insert(LI);
708+
DeadInsts.insert(cast<Instruction>(LoadedVal));
656709
return true;
657710
}
658711

659712
bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
660713
IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
661714
if (!II->hasOneUse())
662715
return false;
663-
664-
StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
665-
666-
if (!SI || !SI->isSimple())
716+
Value *StoredBy = II->user_back();
717+
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
667718
return false;
668719

669720
SmallVector<Value *, 8> InterleaveValues;
670721
SmallVector<Instruction *, 8> InterleaveDeadInsts;
671722
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
672723
return false;
673724

674-
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
675-
<< " with factor = " << InterleaveValues.size() << "\n");
725+
const unsigned Factor = InterleaveValues.size();
676726

677-
// Try and match this with target specific intrinsics.
678-
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
679-
return false;
727+
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
728+
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
729+
return false;
730+
731+
Value *WideMask = VPStore->getOperand(2);
732+
Value *Mask = getMask(WideMask, Factor,
733+
cast<VectorType>(InterleaveValues[0]->getType()));
734+
if (!Mask)
735+
return false;
736+
737+
LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
738+
<< *II << " and factor = " << Factor << "\n");
739+
740+
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741+
// TLI function to emit target-specific interleaved instruction.
742+
if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
743+
InterleaveValues))
744+
return false;
745+
} else {
746+
auto *SI = cast<StoreInst>(StoredBy);
747+
if (!SI->isSimple())
748+
return false;
749+
750+
LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
751+
<< " and factor = " << Factor << "\n");
752+
753+
// Try and match this with target specific intrinsics.
754+
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
755+
return false;
756+
}
680757

681758
// We now have a target-specific store, so delete the old one.
682-
DeadInsts.insert(SI);
759+
DeadInsts.insert(cast<Instruction>(StoredBy));
683760
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
684761
return true;
685762
}

0 commit comments

Comments
 (0)