@@ -67,6 +67,7 @@ STATISTIC(NumCSE, "Number of instructions CSE'd");
6767STATISTIC (NumCSECVP, " Number of compare instructions CVP'd" );
6868STATISTIC (NumCSELoad, " Number of load instructions CSE'd" );
6969STATISTIC (NumCSECall, " Number of call instructions CSE'd" );
70+ STATISTIC (NumCSEGEP, " Number of GEP instructions CSE'd" );
7071STATISTIC (NumDSE, " Number of trivial dead stores removed" );
7172
7273DEBUG_COUNTER (CSECounter, " early-cse" ,
@@ -143,11 +144,11 @@ struct SimpleValue {
143144 !CI->getFunction ()->isPresplitCoroutine ();
144145 }
145146 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst >(Inst) ||
147- isa<CmpInst >(Inst) || isa<SelectInst >(Inst) ||
148- isa<ExtractElementInst >(Inst) || isa<InsertElementInst >(Inst) ||
149- isa<ShuffleVectorInst >(Inst) || isa<ExtractValueInst >(Inst) ||
150- isa<InsertValueInst>(Inst) || isa< FreezeInst>(Inst);
147+ isa<BinaryOperator>(Inst) || isa<CmpInst >(Inst) ||
148+ isa<SelectInst >(Inst) || isa<ExtractElementInst >(Inst) ||
149+ isa<InsertElementInst >(Inst) || isa<ShuffleVectorInst >(Inst) ||
150+ isa<ExtractValueInst >(Inst) || isa<InsertValueInst >(Inst) ||
151+ isa<FreezeInst>(Inst);
151152 }
152153};
153154
@@ -307,10 +308,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
307308 IVI->getOperand (1 ),
308309 hash_combine_range (IVI->idx_begin (), IVI->idx_end ()));
309310
310- assert ((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
311- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
312- isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
313- isa<FreezeInst>(Inst)) &&
311+ assert ((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
312+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
313+ isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314314 " Invalid/unknown instruction" );
315315
316316 // Handle intrinsics with commutative operands.
@@ -548,11 +548,81 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
548548 // currently executing, so conservatively return false if they are in
549549 // different basic blocks.
550550 if (LHSI->isConvergent () && LHSI->getParent () != RHSI->getParent ())
551- return false ;
551+ return false ;
552552
553553 return LHSI->isIdenticalTo (RHSI);
554554}
555555
556+ // ===----------------------------------------------------------------------===//
557+ // GEPValue
558+ // ===----------------------------------------------------------------------===//
559+
560+ namespace {
561+
562+ struct GEPValue {
563+ Instruction *Inst;
564+ std::optional<int64_t > ConstantOffset;
565+
566+ GEPValue (Instruction *I) : Inst(I) {
567+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
568+ }
569+
570+ GEPValue (Instruction *I, std::optional<int64_t > ConstantOffset)
571+ : Inst(I), ConstantOffset(ConstantOffset) {
572+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
573+ }
574+
575+ bool isSentinel () const {
576+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey () ||
577+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey ();
578+ }
579+
580+ static bool canHandle (Instruction *Inst) {
581+ return isa<GetElementPtrInst>(Inst);
582+ }
583+ };
584+
585+ } // namespace
586+
587+ namespace llvm {
588+
589+ template <> struct DenseMapInfo <GEPValue> {
590+ static inline GEPValue getEmptyKey () {
591+ return DenseMapInfo<Instruction *>::getEmptyKey ();
592+ }
593+
594+ static inline GEPValue getTombstoneKey () {
595+ return DenseMapInfo<Instruction *>::getTombstoneKey ();
596+ }
597+
598+ static unsigned getHashValue (const GEPValue &Val);
599+ static bool isEqual (const GEPValue &LHS, const GEPValue &RHS);
600+ };
601+
602+ } // end namespace llvm
603+
604+ unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
605+ auto *GEP = cast<GetElementPtrInst>(Val.Inst );
606+ if (Val.ConstantOffset .has_value ())
607+ return hash_combine (GEP->getOpcode (), GEP->getPointerOperand (),
608+ Val.ConstantOffset .value ());
609+ return hash_combine (
610+ GEP->getOpcode (),
611+ hash_combine_range (GEP->value_op_begin (), GEP->value_op_end ()));
612+ }
613+
614+ bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
615+ if (LHS.isSentinel () || RHS.isSentinel ())
616+ return LHS.Inst == RHS.Inst ;
617+ auto *LGEP = cast<GetElementPtrInst>(LHS.Inst );
618+ auto *RGEP = cast<GetElementPtrInst>(RHS.Inst );
619+ if (LGEP->getPointerOperand () != RGEP->getPointerOperand ())
620+ return false ;
621+ if (LHS.ConstantOffset .has_value () && RHS.ConstantOffset .has_value ())
622+ return LHS.ConstantOffset .value () == RHS.ConstantOffset .value ();
623+ return LGEP->isIdenticalToWhenDefined (RGEP);
624+ }
625+
556626// ===----------------------------------------------------------------------===//
557627// EarlyCSE implementation
558628// ===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
647717 ScopedHashTable<CallValue, std::pair<Instruction *, unsigned >>;
648718 CallHTType AvailableCalls;
649719
720+ using GEPMapAllocatorTy =
721+ RecyclingAllocator<BumpPtrAllocator,
722+ ScopedHashTableVal<GEPValue, Value *>>;
723+ using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
724+ GEPMapAllocatorTy>;
725+ GEPHTType AvailableGEPs;
726+
650727 // / This is the current generation of the memory value.
651728 unsigned CurrentGeneration = 0 ;
652729
@@ -667,9 +744,11 @@ class EarlyCSE {
667744 class NodeScope {
668745 public:
669746 NodeScope (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
670- InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
671- : Scope(AvailableValues), LoadScope(AvailableLoads),
672- InvariantScope (AvailableInvariants), CallScope(AvailableCalls) {}
747+ InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
748+ GEPHTType &AvailableGEPs)
749+ : Scope(AvailableValues), LoadScope(AvailableLoads),
750+ InvariantScope (AvailableInvariants), CallScope(AvailableCalls),
751+ GEPScope(AvailableGEPs) {}
673752 NodeScope (const NodeScope &) = delete;
674753 NodeScope &operator =(const NodeScope &) = delete ;
675754
@@ -678,6 +757,7 @@ class EarlyCSE {
678757 LoadHTType::ScopeTy LoadScope;
679758 InvariantHTType::ScopeTy InvariantScope;
680759 CallHTType::ScopeTy CallScope;
760+ GEPHTType::ScopeTy GEPScope;
681761 };
682762
683763 // Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
688768 public:
689769 StackNode (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
690770 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
691- unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
771+ GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
772+ DomTreeNode::const_iterator child,
692773 DomTreeNode::const_iterator end)
693774 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
694775 EndIter (end),
695776 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
696- AvailableCalls)
697- {}
777+ AvailableCalls, AvailableGEPs) {}
698778 StackNode (const StackNode &) = delete;
699779 StackNode &operator =(const StackNode &) = delete ;
700780
@@ -1214,6 +1294,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
12141294 return Result;
12151295}
12161296
1297+ static void combineIRFlags (Instruction &From, Value *To) {
1298+ if (auto *I = dyn_cast<Instruction>(To)) {
1299+ // If I being poison triggers UB, there is no need to drop those
1300+ // flags. Otherwise, only retain flags present on both I and Inst.
1301+ // TODO: Currently some fast-math flags are not treated as
1302+ // poison-generating even though they should. Until this is fixed,
1303+ // always retain flags present on both I and Inst for floating point
1304+ // instructions.
1305+ if (isa<FPMathOperator>(I) ||
1306+ (I->hasPoisonGeneratingFlags () && !programUndefinedIfPoison (I)))
1307+ I->andIRFlags (&From);
1308+ }
1309+ }
1310+
12171311bool EarlyCSE::overridingStores (const ParseMemoryInst &Earlier,
12181312 const ParseMemoryInst &Later) {
12191313 // Can we remove Earlier store because of Later store?
@@ -1439,16 +1533,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
14391533 LLVM_DEBUG (dbgs () << " Skipping due to debug counter\n " );
14401534 continue ;
14411535 }
1442- if (auto *I = dyn_cast<Instruction>(V)) {
1443- // If I being poison triggers UB, there is no need to drop those
1444- // flags. Otherwise, only retain flags present on both I and Inst.
1445- // TODO: Currently some fast-math flags are not treated as
1446- // poison-generating even though they should. Until this is fixed,
1447- // always retain flags present on both I and Inst for floating point
1448- // instructions.
1449- if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags () && !programUndefinedIfPoison (I)))
1450- I->andIRFlags (&Inst);
1451- }
1536+ combineIRFlags (Inst, V);
14521537 Inst.replaceAllUsesWith (V);
14531538 salvageKnowledge (&Inst, &AC);
14541539 removeMSSA (Inst);
@@ -1561,6 +1646,31 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
15611646 continue ;
15621647 }
15631648
1649+ // Compare GEP instructions based on offset.
1650+ if (GEPValue::canHandle (&Inst)) {
1651+ auto *GEP = cast<GetElementPtrInst>(&Inst);
1652+ APInt Offset = APInt (SQ.DL .getIndexTypeSizeInBits (GEP->getType ()), 0 );
1653+ GEPValue GEPVal (GEP, GEP->accumulateConstantOffset (SQ.DL , Offset)
1654+ ? Offset.trySExtValue ()
1655+ : std::nullopt );
1656+ if (Value *V = AvailableGEPs.lookup (GEPVal)) {
1657+ LLVM_DEBUG (dbgs () << " EarlyCSE CSE GEP: " << Inst << " to: " << *V
1658+ << ' \n ' );
1659+ combineIRFlags (Inst, V);
1660+ Inst.replaceAllUsesWith (V);
1661+ salvageKnowledge (&Inst, &AC);
1662+ removeMSSA (Inst);
1663+ Inst.eraseFromParent ();
1664+ Changed = true ;
1665+ ++NumCSEGEP;
1666+ continue ;
1667+ }
1668+
1669+ // Otherwise, just remember that we have this GEP.
1670+ AvailableGEPs.insert (GEPVal, &Inst);
1671+ continue ;
1672+ }
1673+
15641674 // A release fence requires that all stores complete before it, but does
15651675 // not prevent the reordering of following loads 'before' the fence. As a
15661676 // result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1785,7 @@ bool EarlyCSE::run() {
16751785 // Process the root node.
16761786 nodesToProcess.push_back (new StackNode (
16771787 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1678- CurrentGeneration, DT.getRootNode (),
1788+ AvailableGEPs, CurrentGeneration, DT.getRootNode (),
16791789 DT.getRootNode ()->begin (), DT.getRootNode ()->end ()));
16801790
16811791 assert (!CurrentGeneration && " Create a new EarlyCSE instance to rerun it." );
@@ -1698,10 +1808,10 @@ bool EarlyCSE::run() {
16981808 } else if (NodeToProcess->childIter () != NodeToProcess->end ()) {
16991809 // Push the next child onto the stack.
17001810 DomTreeNode *child = NodeToProcess->nextChild ();
1701- nodesToProcess.push_back (
1702- new StackNode ( AvailableValues, AvailableLoads, AvailableInvariants,
1703- AvailableCalls , NodeToProcess->childGeneration (),
1704- child, child->begin (), child->end ()));
1811+ nodesToProcess.push_back (new StackNode (
1812+ AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls ,
1813+ AvailableGEPs , NodeToProcess->childGeneration (), child ,
1814+ child->begin (), child->end ()));
17051815 } else {
17061816 // It has been processed, and there are no more children to process,
17071817 // so delete it and pop it off the stack.
0 commit comments