@@ -67,6 +67,7 @@ STATISTIC(NumCSE, "Number of instructions CSE'd");
67
67
STATISTIC (NumCSECVP, " Number of compare instructions CVP'd" );
68
68
STATISTIC (NumCSELoad, " Number of load instructions CSE'd" );
69
69
STATISTIC (NumCSECall, " Number of call instructions CSE'd" );
70
+ STATISTIC (NumCSEGEP, " Number of GEP instructions CSE'd" );
70
71
STATISTIC (NumDSE, " Number of trivial dead stores removed" );
71
72
72
73
DEBUG_COUNTER (CSECounter, " early-cse" ,
@@ -143,11 +144,11 @@ struct SimpleValue {
143
144
!CI->getFunction ()->isPresplitCoroutine ();
144
145
}
145
146
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146
- isa<BinaryOperator>(Inst) || isa<GetElementPtrInst >(Inst) ||
147
- isa<CmpInst >(Inst) || isa<SelectInst >(Inst) ||
148
- isa<ExtractElementInst >(Inst) || isa<InsertElementInst >(Inst) ||
149
- isa<ShuffleVectorInst >(Inst) || isa<ExtractValueInst >(Inst) ||
150
- isa<InsertValueInst>(Inst) || isa< FreezeInst>(Inst);
147
+ isa<BinaryOperator>(Inst) || isa<CmpInst >(Inst) ||
148
+ isa<SelectInst >(Inst) || isa<ExtractElementInst >(Inst) ||
149
+ isa<InsertElementInst >(Inst) || isa<ShuffleVectorInst >(Inst) ||
150
+ isa<ExtractValueInst >(Inst) || isa<InsertValueInst >(Inst) ||
151
+ isa<FreezeInst>(Inst);
151
152
}
152
153
};
153
154
@@ -307,10 +308,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
307
308
IVI->getOperand (1 ),
308
309
hash_combine_range (IVI->idx_begin (), IVI->idx_end ()));
309
310
310
- assert ((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
311
- isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
312
- isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
313
- isa<FreezeInst>(Inst)) &&
311
+ assert ((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
312
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
313
+ isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314
314
" Invalid/unknown instruction" );
315
315
316
316
// Handle intrinsics with commutative operands.
@@ -548,11 +548,81 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
548
548
// currently executing, so conservatively return false if they are in
549
549
// different basic blocks.
550
550
if (LHSI->isConvergent () && LHSI->getParent () != RHSI->getParent ())
551
- return false ;
551
+ return false ;
552
552
553
553
return LHSI->isIdenticalTo (RHSI);
554
554
}
555
555
556
+ // ===----------------------------------------------------------------------===//
557
+ // GEPValue
558
+ // ===----------------------------------------------------------------------===//
559
+
560
+ namespace {
561
+
562
+ struct GEPValue {
563
+ Instruction *Inst;
564
+ std::optional<int64_t > ConstantOffset;
565
+
566
+ GEPValue (Instruction *I) : Inst(I) {
567
+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
568
+ }
569
+
570
+ GEPValue (Instruction *I, std::optional<int64_t > ConstantOffset)
571
+ : Inst(I), ConstantOffset(ConstantOffset) {
572
+ assert ((isSentinel () || canHandle (I)) && " Inst can't be handled!" );
573
+ }
574
+
575
+ bool isSentinel () const {
576
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey () ||
577
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey ();
578
+ }
579
+
580
+ static bool canHandle (Instruction *Inst) {
581
+ return isa<GetElementPtrInst>(Inst);
582
+ }
583
+ };
584
+
585
+ } // namespace
586
+
587
+ namespace llvm {
588
+
589
+ template <> struct DenseMapInfo <GEPValue> {
590
+ static inline GEPValue getEmptyKey () {
591
+ return DenseMapInfo<Instruction *>::getEmptyKey ();
592
+ }
593
+
594
+ static inline GEPValue getTombstoneKey () {
595
+ return DenseMapInfo<Instruction *>::getTombstoneKey ();
596
+ }
597
+
598
+ static unsigned getHashValue (const GEPValue &Val);
599
+ static bool isEqual (const GEPValue &LHS, const GEPValue &RHS);
600
+ };
601
+
602
+ } // end namespace llvm
603
+
604
+ unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
605
+ auto *GEP = cast<GetElementPtrInst>(Val.Inst );
606
+ if (Val.ConstantOffset .has_value ())
607
+ return hash_combine (GEP->getOpcode (), GEP->getPointerOperand (),
608
+ Val.ConstantOffset .value ());
609
+ return hash_combine (
610
+ GEP->getOpcode (),
611
+ hash_combine_range (GEP->value_op_begin (), GEP->value_op_end ()));
612
+ }
613
+
614
+ bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
615
+ if (LHS.isSentinel () || RHS.isSentinel ())
616
+ return LHS.Inst == RHS.Inst ;
617
+ auto *LGEP = cast<GetElementPtrInst>(LHS.Inst );
618
+ auto *RGEP = cast<GetElementPtrInst>(RHS.Inst );
619
+ if (LGEP->getPointerOperand () != RGEP->getPointerOperand ())
620
+ return false ;
621
+ if (LHS.ConstantOffset .has_value () && RHS.ConstantOffset .has_value ())
622
+ return LHS.ConstantOffset .value () == RHS.ConstantOffset .value ();
623
+ return LGEP->isIdenticalToWhenDefined (RGEP);
624
+ }
625
+
556
626
// ===----------------------------------------------------------------------===//
557
627
// EarlyCSE implementation
558
628
// ===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
647
717
ScopedHashTable<CallValue, std::pair<Instruction *, unsigned >>;
648
718
CallHTType AvailableCalls;
649
719
720
+ using GEPMapAllocatorTy =
721
+ RecyclingAllocator<BumpPtrAllocator,
722
+ ScopedHashTableVal<GEPValue, Value *>>;
723
+ using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
724
+ GEPMapAllocatorTy>;
725
+ GEPHTType AvailableGEPs;
726
+
650
727
// / This is the current generation of the memory value.
651
728
unsigned CurrentGeneration = 0 ;
652
729
@@ -667,9 +744,11 @@ class EarlyCSE {
667
744
class NodeScope {
668
745
public:
669
746
NodeScope (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
670
- InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
671
- : Scope(AvailableValues), LoadScope(AvailableLoads),
672
- InvariantScope (AvailableInvariants), CallScope(AvailableCalls) {}
747
+ InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
748
+ GEPHTType &AvailableGEPs)
749
+ : Scope(AvailableValues), LoadScope(AvailableLoads),
750
+ InvariantScope (AvailableInvariants), CallScope(AvailableCalls),
751
+ GEPScope(AvailableGEPs) {}
673
752
NodeScope (const NodeScope &) = delete;
674
753
NodeScope &operator =(const NodeScope &) = delete ;
675
754
@@ -678,6 +757,7 @@ class EarlyCSE {
678
757
LoadHTType::ScopeTy LoadScope;
679
758
InvariantHTType::ScopeTy InvariantScope;
680
759
CallHTType::ScopeTy CallScope;
760
+ GEPHTType::ScopeTy GEPScope;
681
761
};
682
762
683
763
// Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
688
768
public:
689
769
StackNode (ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
690
770
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
691
- unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
771
+ GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
772
+ DomTreeNode::const_iterator child,
692
773
DomTreeNode::const_iterator end)
693
774
: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
694
775
EndIter (end),
695
776
Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
696
- AvailableCalls)
697
- {}
777
+ AvailableCalls, AvailableGEPs) {}
698
778
StackNode (const StackNode &) = delete;
699
779
StackNode &operator =(const StackNode &) = delete ;
700
780
@@ -1214,6 +1294,20 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1214
1294
return Result;
1215
1295
}
1216
1296
1297
+ static void combineIRFlags (Instruction &From, Value *To) {
1298
+ if (auto *I = dyn_cast<Instruction>(To)) {
1299
+ // If I being poison triggers UB, there is no need to drop those
1300
+ // flags. Otherwise, only retain flags present on both I and Inst.
1301
+ // TODO: Currently some fast-math flags are not treated as
1302
+ // poison-generating even though they should. Until this is fixed,
1303
+ // always retain flags present on both I and Inst for floating point
1304
+ // instructions.
1305
+ if (isa<FPMathOperator>(I) ||
1306
+ (I->hasPoisonGeneratingFlags () && !programUndefinedIfPoison (I)))
1307
+ I->andIRFlags (&From);
1308
+ }
1309
+ }
1310
+
1217
1311
bool EarlyCSE::overridingStores (const ParseMemoryInst &Earlier,
1218
1312
const ParseMemoryInst &Later) {
1219
1313
// Can we remove Earlier store because of Later store?
@@ -1439,16 +1533,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
1439
1533
LLVM_DEBUG (dbgs () << " Skipping due to debug counter\n " );
1440
1534
continue ;
1441
1535
}
1442
- if (auto *I = dyn_cast<Instruction>(V)) {
1443
- // If I being poison triggers UB, there is no need to drop those
1444
- // flags. Otherwise, only retain flags present on both I and Inst.
1445
- // TODO: Currently some fast-math flags are not treated as
1446
- // poison-generating even though they should. Until this is fixed,
1447
- // always retain flags present on both I and Inst for floating point
1448
- // instructions.
1449
- if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags () && !programUndefinedIfPoison (I)))
1450
- I->andIRFlags (&Inst);
1451
- }
1536
+ combineIRFlags (Inst, V);
1452
1537
Inst.replaceAllUsesWith (V);
1453
1538
salvageKnowledge (&Inst, &AC);
1454
1539
removeMSSA (Inst);
@@ -1561,6 +1646,31 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
1561
1646
continue ;
1562
1647
}
1563
1648
1649
+ // Compare GEP instructions based on offset.
1650
+ if (GEPValue::canHandle (&Inst)) {
1651
+ auto *GEP = cast<GetElementPtrInst>(&Inst);
1652
+ APInt Offset = APInt (SQ.DL .getIndexTypeSizeInBits (GEP->getType ()), 0 );
1653
+ GEPValue GEPVal (GEP, GEP->accumulateConstantOffset (SQ.DL , Offset)
1654
+ ? Offset.trySExtValue ()
1655
+ : std::nullopt);
1656
+ if (Value *V = AvailableGEPs.lookup (GEPVal)) {
1657
+ LLVM_DEBUG (dbgs () << " EarlyCSE CSE GEP: " << Inst << " to: " << *V
1658
+ << ' \n ' );
1659
+ combineIRFlags (Inst, V);
1660
+ Inst.replaceAllUsesWith (V);
1661
+ salvageKnowledge (&Inst, &AC);
1662
+ removeMSSA (Inst);
1663
+ Inst.eraseFromParent ();
1664
+ Changed = true ;
1665
+ ++NumCSEGEP;
1666
+ continue ;
1667
+ }
1668
+
1669
+ // Otherwise, just remember that we have this GEP.
1670
+ AvailableGEPs.insert (GEPVal, &Inst);
1671
+ continue ;
1672
+ }
1673
+
1564
1674
// A release fence requires that all stores complete before it, but does
1565
1675
// not prevent the reordering of following loads 'before' the fence. As a
1566
1676
// result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1785,7 @@ bool EarlyCSE::run() {
1675
1785
// Process the root node.
1676
1786
nodesToProcess.push_back (new StackNode (
1677
1787
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1678
- CurrentGeneration, DT.getRootNode (),
1788
+ AvailableGEPs, CurrentGeneration, DT.getRootNode (),
1679
1789
DT.getRootNode ()->begin (), DT.getRootNode ()->end ()));
1680
1790
1681
1791
assert (!CurrentGeneration && " Create a new EarlyCSE instance to rerun it." );
@@ -1698,10 +1808,10 @@ bool EarlyCSE::run() {
1698
1808
} else if (NodeToProcess->childIter () != NodeToProcess->end ()) {
1699
1809
// Push the next child onto the stack.
1700
1810
DomTreeNode *child = NodeToProcess->nextChild ();
1701
- nodesToProcess.push_back (
1702
- new StackNode ( AvailableValues, AvailableLoads, AvailableInvariants,
1703
- AvailableCalls , NodeToProcess->childGeneration (),
1704
- child, child->begin (), child->end ()));
1811
+ nodesToProcess.push_back (new StackNode (
1812
+ AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls ,
1813
+ AvailableGEPs , NodeToProcess->childGeneration (), child ,
1814
+ child->begin (), child->end ()));
1705
1815
} else {
1706
1816
// It has been processed, and there are no more children to process,
1707
1817
// so delete it and pop it off the stack.
0 commit comments