@@ -143,11 +143,11 @@ struct SimpleValue {
143
143
!CI->getFunction ()->isPresplitCoroutine ();
144
144
}
145
145
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146
- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst >(Inst) ||
147
- isa<CmpInst >(Inst) || isa<SelectInst >(Inst) ||
148
- isa<ExtractElementInst >(Inst) || isa<InsertElementInst >(Inst) ||
149
- isa<ShuffleVectorInst >(Inst) || isa<ExtractValueInst >(Inst) ||
150
- isa<InsertValueInst>(Inst) || isa< FreezeInst>(Inst);
146
+ isa<BinaryOperator>(Inst) || isa<CmpInst >(Inst) ||
147
+ isa<SelectInst >(Inst) || isa<ExtractElementInst >(Inst) ||
148
+ isa<InsertElementInst >(Inst) || isa<ShuffleVectorInst >(Inst) ||
149
+ isa<ExtractValueInst >(Inst) || isa<InsertValueInst >(Inst) ||
150
+ isa<FreezeInst>(Inst);
151
151
}
152
152
};
153
153
@@ -307,10 +307,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
307
307
IVI->getOperand (1 ),
308
308
hash_combine_range (IVI->idx_begin (), IVI->idx_end ()));
309
309
310
- assert ((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
311
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
312
- isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
313
- isa<FreezeInst>(Inst)) &&
310
+ assert ((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
311
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
312
+ isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314
313
" Invalid/unknown instruction" );
315
314
316
315
// Handle intrinsics with commutative operands.
@@ -553,6 +552,77 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
553
552
return LHSI->isIdenticalTo (RHSI);
554
553
}
555
554
555
+ // ===----------------------------------------------------------------------===//
556
+ // GEPValue
557
+ // ===----------------------------------------------------------------------===//
558
+
559
+ namespace {
560
+
561
+ struct GEPValue {
562
+ Instruction *Inst;
563
+ APInt ConstantOffset;
564
+ bool HasConstantOffset;
565
+
566
+ GEPValue (Instruction *I) : Inst(I), HasConstantOffset(false ) {
567
+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
568
+ }
569
+ GEPValue (Instruction *I, APInt ConstantOffset, bool HasConstantOffset)
570
+ : Inst(I), ConstantOffset(ConstantOffset),
571
+ HasConstantOffset (HasConstantOffset) {
572
+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
573
+ }
574
+
575
+ bool isSentinel () const {
576
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey () ||
577
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey ();
578
+ }
579
+
580
+ static bool canHandle (Instruction *Inst) {
581
+ return isa<GetElementPtrInst>(Inst);
582
+ }
583
+ };
584
+
585
+ } // namespace
586
+
587
+ namespace llvm {
588
+
589
+ template <> struct DenseMapInfo <GEPValue> {
590
+ static inline GEPValue getEmptyKey () {
591
+ return DenseMapInfo<Instruction *>::getEmptyKey ();
592
+ }
593
+
594
+ static inline GEPValue getTombstoneKey () {
595
+ return DenseMapInfo<Instruction *>::getTombstoneKey ();
596
+ }
597
+
598
+ static unsigned getHashValue (GEPValue Val);
599
+ static bool isEqual (GEPValue LHS, GEPValue RHS);
600
+ };
601
+
602
+ } // end namespace llvm
603
+
604
+ unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
605
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst );
606
+ if (Val.HasConstantOffset )
607
+ return hash_combine (GEP->getOpcode (), GEP->getPointerOperand (),
608
+ Val.ConstantOffset );
609
+ return hash_combine (
610
+ GEP->getOpcode (),
611
+ hash_combine_range (GEP->value_op_begin (), GEP->value_op_end ()));
612
+ }
613
+
614
+ bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
615
+ if (LHS.isSentinel () || RHS.isSentinel ())
616
+ return LHS.Inst == RHS.Inst ;
617
+ GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst );
618
+ GetElementPtrInst *RGEP = cast<GetElementPtrInst>(RHS.Inst );
619
+ if (LGEP->getPointerOperand () != RGEP->getPointerOperand ())
620
+ return false ;
621
+ if (LHS.HasConstantOffset && RHS.HasConstantOffset )
622
+ return LHS.ConstantOffset == RHS.ConstantOffset ;
623
+ return LGEP->isIdenticalToWhenDefined (RGEP);
624
+ }
625
+
556
626
// ===----------------------------------------------------------------------===//
557
627
// EarlyCSE implementation
558
628
// ===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
647
717
ScopedHashTable<CallValue, std::pair<Instruction *, unsigned >>;
648
718
CallHTType AvailableCalls;
649
719
720
+ using GEPMapAllocatorTy =
721
+ RecyclingAllocator<BumpPtrAllocator,
722
+ ScopedHashTableVal<GEPValue, Value *>>;
723
+ using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
724
+ GEPMapAllocatorTy>;
725
+ GEPHTType AvailableGEPs;
726
+
650
727
// / This is the current generation of the memory value.
651
728
unsigned CurrentGeneration = 0 ;
652
729
@@ -667,9 +744,11 @@ class EarlyCSE {
667
744
class NodeScope {
668
745
public:
669
746
NodeScope (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
670
- InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
671
- : Scope(AvailableValues), LoadScope(AvailableLoads),
672
- InvariantScope (AvailableInvariants), CallScope(AvailableCalls) {}
747
+ InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
748
+ GEPHTType &AvailableGEPs)
749
+ : Scope(AvailableValues), LoadScope(AvailableLoads),
750
+ InvariantScope (AvailableInvariants), CallScope(AvailableCalls),
751
+ GEPScope(AvailableGEPs) {}
673
752
NodeScope (const NodeScope &) = delete;
674
753
NodeScope &operator =(const NodeScope &) = delete ;
675
754
@@ -678,6 +757,7 @@ class EarlyCSE {
678
757
LoadHTType::ScopeTy LoadScope;
679
758
InvariantHTType::ScopeTy InvariantScope;
680
759
CallHTType::ScopeTy CallScope;
760
+ GEPHTType::ScopeTy GEPScope;
681
761
};
682
762
683
763
// Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
688
768
public:
689
769
StackNode (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
690
770
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
691
- unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
771
+ GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
772
+ DomTreeNode::const_iterator child,
692
773
DomTreeNode::const_iterator end)
693
774
: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
694
775
EndIter (end),
695
776
Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
696
- AvailableCalls)
697
- {}
777
+ AvailableCalls, AvailableGEPs) {}
698
778
StackNode (const StackNode &) = delete;
699
779
StackNode &operator =(const StackNode &) = delete ;
700
780
@@ -1561,6 +1641,39 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
1561
1641
continue ;
1562
1642
}
1563
1643
1644
+ if (GEPValue::canHandle (&Inst)) {
1645
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
1646
+ APInt Offset (SQ.DL .getIndexTypeSizeInBits (GEP->getType ()), 0 );
1647
+ bool HasConstantOffset = GEP->accumulateConstantOffset (SQ.DL , Offset);
1648
+ GEPValue GEPVal (GEP, Offset, HasConstantOffset);
1649
+ if (Value *V = AvailableGEPs.lookup (GEPVal)) {
1650
+ LLVM_DEBUG (dbgs () << " EarlyCSE CSE: " << Inst << " to: " << *V
1651
+ << ' \n ' );
1652
+ if (auto *I = dyn_cast<Instruction>(V)) {
1653
+ // If I being poison triggers UB, there is no need to drop those
1654
+ // flags. Otherwise, only retain flags present on both I and Inst.
1655
+ // TODO: Currently some fast-math flags are not treated as
1656
+ // poison-generating even though they should. Until this is fixed,
1657
+ // always retain flags present on both I and Inst for floating point
1658
+ // instructions.
1659
+ if (isa<FPMathOperator>(I) ||
1660
+ (I->hasPoisonGeneratingFlags () && !programUndefinedIfPoison (I)))
1661
+ I->andIRFlags (&Inst);
1662
+ }
1663
+ Inst.replaceAllUsesWith (V);
1664
+ salvageKnowledge (&Inst, &AC);
1665
+ removeMSSA (Inst);
1666
+ Inst.eraseFromParent ();
1667
+ Changed = true ;
1668
+ ++NumCSE;
1669
+ continue ;
1670
+ }
1671
+
1672
+ // Otherwise, just remember that this value is available.
1673
+ AvailableGEPs.insert (GEPVal, &Inst);
1674
+ continue ;
1675
+ }
1676
+
1564
1677
// A release fence requires that all stores complete before it, but does
1565
1678
// not prevent the reordering of following loads 'before' the fence. As a
1566
1679
// result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1788,7 @@ bool EarlyCSE::run() {
1675
1788
// Process the root node.
1676
1789
nodesToProcess.push_back (new StackNode (
1677
1790
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1678
- CurrentGeneration, DT.getRootNode (),
1791
+ AvailableGEPs, CurrentGeneration, DT.getRootNode (),
1679
1792
DT.getRootNode ()->begin (), DT.getRootNode ()->end ()));
1680
1793
1681
1794
assert (!CurrentGeneration && " Create a new EarlyCSE instance to rerun it." );
@@ -1698,10 +1811,10 @@ bool EarlyCSE::run() {
1698
1811
} else if (NodeToProcess->childIter () != NodeToProcess->end ()) {
1699
1812
// Push the next child onto the stack.
1700
1813
DomTreeNode *child = NodeToProcess->nextChild ();
1701
- nodesToProcess.push_back (
1702
- new StackNode ( AvailableValues, AvailableLoads, AvailableInvariants,
1703
- AvailableCalls , NodeToProcess->childGeneration (),
1704
- child, child->begin (), child->end ()));
1814
+ nodesToProcess.push_back (new StackNode (
1815
+ AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls ,
1816
+ AvailableGEPs , NodeToProcess->childGeneration (), child ,
1817
+ child->begin (), child->end ()));
1705
1818
} else {
1706
1819
// It has been processed, and there are no more children to process,
1707
1820
// so delete it and pop it off the stack.
0 commit comments