diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 55090a9b1efe..2a6987848ca5 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -70,16 +70,17 @@ using namespace llvm; #define DEBUG_TYPE "loop-accesses" static cl::opt - VectorizationFactor("force-vector-width", cl::Hidden, - cl::desc("Sets the SIMD width. Zero is autoselect."), - cl::location(VectorizerParams::VectorizationFactor)); +VectorizationFactor("force-vector-width", cl::Hidden, + cl::desc("Sets the SIMD width. Zero is autoselect."), + cl::location(VectorizerParams::VectorizationFactor)); unsigned VectorizerParams::VectorizationFactor; -static cl::opt VectorizationInterleave( - "force-vector-interleave", cl::Hidden, - cl::desc("Sets the vectorization interleave count. " - "Zero is autoselect."), - cl::location(VectorizerParams::VectorizationInterleave)); +static cl::opt +VectorizationInterleave("force-vector-interleave", cl::Hidden, + cl::desc("Sets the vectorization interleave count. " + "Zero is autoselect."), + cl::location( + VectorizerParams::VectorizationInterleave)); unsigned VectorizerParams::VectorizationInterleave; static cl::opt RuntimeMemoryCheckThreshold( @@ -389,7 +390,10 @@ void RuntimePointerChecking::groupChecks( // equivalence class, the iteration order is deterministic. for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); MI != ME; ++MI) { - unsigned Pointer = PositionMap[MI->getPointer()]; + auto PointerI = PositionMap.find(MI->getPointer()); + assert(PointerI != PositionMap.end() && + "pointer in equivalence class not found in PositionMap"); + unsigned Pointer = PointerI->second; bool Merged = false; // Mark this pointer as seen. Seen.insert(Pointer); @@ -501,10 +505,10 @@ class AccessAnalysis { typedef PointerIntPair MemAccessInfo; typedef SmallVector MemAccessInfoList; - AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AAResults *AA, - LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, + AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, + MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) - : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), + : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), PSE(PSE) {} /// Register a load and whether it is only read from. @@ -535,7 +539,8 @@ class AccessAnalysis { const ValueToValueMap &Strides, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckStride, bool Assume); + unsigned ASId, bool ShouldCheckStride, + bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. @@ -548,7 +553,9 @@ class AccessAnalysis { /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. - void buildDependenceSets() { processMemAccesses(); } + void buildDependenceSets() { + processMemAccesses(); + } /// Initial processing of memory accesses determined that we need to /// perform dependency checking. @@ -575,8 +582,6 @@ class AccessAnalysis { /// Set of all accesses. PtrAccessSet Accesses; - const DataLayout &DL; - /// The loop being checked. const Loop *TheLoop; @@ -584,10 +589,10 @@ class AccessAnalysis { MemAccessInfoList CheckDeps; /// Set of pointers that are read only. - SmallPtrSet ReadOnlyPtr; + SmallPtrSet ReadOnlyPtr; /// An alias set tracker to partition the access set by underlying object and - // intrinsic property (such as TBAA metadata). + //intrinsic property (such as TBAA metadata). AliasSetTracker AST; LoopInfo *LI; @@ -688,7 +693,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); return true; -} + } bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, @@ -699,8 +704,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, bool CanDoRT = true; bool MayNeedRTCheck = false; - if (!IsRTCheckAnalysisNeeded) - return true; + if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); @@ -720,53 +724,55 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, SmallVector Retries; - for (auto A : AS) { + // First, count how many write and read accesses are in the alias set. Also + // collect MemAccessInfos for later. + SmallVector AccessInfos; + for (const auto &A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); - MemAccessInfo Access(Ptr, IsWrite); if (IsWrite) ++NumWritePtrChecks; else ++NumReadPtrChecks; + AccessInfos.emplace_back(Ptr, IsWrite); + } + + // We do not need runtime checks for this alias set, if there are no writes + // or a single write and no reads. + if (NumWritePtrChecks == 0 || + (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { + assert((AS.size() <= 1 || + all_of(AS, + [this](auto AC) { + MemAccessInfo AccessWrite(AC.getValue(), true); + return DepCands.findValue(AccessWrite) == DepCands.end(); + })) && + "Can only skip updating CanDoRT below, if all entries in AS " + "are reads or there is at most 1 entry"); + continue; + } + for (auto &Access : AccessInfos) { if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { - LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" + << *Access.getPointer() << '\n'); Retries.push_back(Access); CanDoAliasSetRT = false; } } - // If we have at least two writes or one write and a read then we need to - // check them. But there is no need to checks if there is only one - // dependence set for this alias set. - // // Note that this function computes CanDoRT and MayNeedRTCheck // independently. For example CanDoRT=false, MayNeedRTCheck=false means that // we have a pointer for which we couldn't find the bounds but we don't // actually need to emit any checks so it does not matter. - bool NeedsAliasSetRTCheck = false; - if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) { - NeedsAliasSetRTCheck = - (NumWritePtrChecks >= 2 || - (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); - // For alias sets without at least 2 writes or 1 write and 1 read, there - // is no need to generate RT checks and CanDoAliasSetRT for this alias set - // does not impact whether runtime checks can be generated. - if (!NeedsAliasSetRTCheck) { - assert((AS.size() <= 1 || - all_of(AS, - [this](auto AC) { - MemAccessInfo AccessWrite(AC.getValue(), true); - return DepCands.findValue(AccessWrite) == - DepCands.end(); - })) && - "Can only skip updating CanDoRT below, if all entries in AS " - "are reads or there is at most 1 entry"); - continue; - } - } + // + // We need runtime checks for this alias set, if there are at least 2 + // dependence sets (in which case RunningDepId > 2) or if we need to re-try + // any bound checks (because in that case the number of dependence sets is + // incomplete). + bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. @@ -777,8 +783,8 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, CanDoAliasSetRT = true; for (auto Access : Retries) if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, - TheLoop, RunningDepId, ASId, ShouldCheckWrap, - /*Assume=*/true)) { + TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { CanDoAliasSetRT = false; break; } @@ -800,7 +806,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // Only need to check pointers between two different dependency sets. if (RtCheck.Pointers[i].DependencySetId == RtCheck.Pointers[j].DependencySetId) - continue; + continue; // Only need to check pointers in the same alias set. if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; @@ -846,18 +852,16 @@ void AccessAnalysis::processMemAccesses() { LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); LLVM_DEBUG({ for (auto A : Accesses) - dbgs() << "\t" << *A.getPointer() << " (" - << (A.getInt() ? "write" - : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" - : "read")) - << ")\n"; + dbgs() << "\t" << *A.getPointer() << " (" << + (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? + "read-only" : "read")) << ")\n"; }); // The AliasSetTracker has nicely partitioned our pointers by metadata // compatibility and potential for underlying-object overlap. As a result, we // only need to check for potential pointer dependencies within each alias // set. - for (auto &AS : AST) { + for (const auto &AS : AST) { // Note that both the alias-set tracker and the alias sets themselves used // linked lists internally and so the iteration order here is deterministic // (matching the original instruction order within each set). @@ -865,7 +869,7 @@ void AccessAnalysis::processMemAccesses() { bool SetHasWrite = false; // Map of pointers to last access encountered. - typedef DenseMap UnderlyingObjToAccessMap; + typedef DenseMap UnderlyingObjToAccessMap; UnderlyingObjToAccessMap ObjToLastAccess; // Set of access to check after all writes have been processed. @@ -877,12 +881,12 @@ void AccessAnalysis::processMemAccesses() { bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; - for (auto AV : AS) { + for (const auto &AV : AS) { Value *Ptr = AV.getValue(); // For a single memory access in AliasSetTracker, Accesses may contain // both read and write, and they both need to be handled for CheckDeps. - for (auto AC : S) { + for (const auto &AC : S) { if (AC.getPointer() != Ptr) continue; @@ -1051,10 +1055,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = - !ShouldCheckWrap || - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = !ShouldCheckWrap || + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); if (!IsNoWrapAddRec && !IsInBoundsGEP && NullPointerIsDefined(Lp->getHeader()->getParent(), PtrTy->getAddressSpace())) { @@ -1450,7 +1453,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx, const ValueToValueMap &Strides) { - assert(AIdx < BIdx && "Must pass arguments in program order"); + assert (AIdx < BIdx && "Must pass arguments in program order"); Value *APtr = A.getPointer(); Value *BPtr = B.getPointer(); @@ -1492,7 +1495,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } @@ -1559,12 +1562,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, } // Bail out early if passed-in parameters make vectorization not feasible. - unsigned ForcedFactor = (VectorizerParams::VectorizationFactor - ? VectorizerParams::VectorizationFactor - : 1); - unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave - ? VectorizerParams::VectorizationInterleave - : 1); + unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? + VectorizerParams::VectorizationFactor : 1); + unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? + VectorizerParams::VectorizationInterleave : 1); // The minimum number of iterations for a vectorized/unrolled version. unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); @@ -1653,7 +1654,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Get the relevant memory access set. EquivalenceClasses::iterator I = - AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); + AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); // Check accesses within this set. EquivalenceClasses::member_iterator AI = @@ -1672,8 +1673,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, while (OI != AE) { // Check every accessing instruction pair in program order. for (std::vector::iterator I1 = Accesses[*AI].begin(), - I1E = Accesses[*AI].end(); - I1 != I1E; ++I1) + I1E = Accesses[*AI].end(); I1 != I1E; ++I1) // Scan all accesses of another equivalence class, but only the next // accesses of the same equivalent class. for (std::vector::iterator @@ -1725,19 +1725,15 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { auto &IndexVector = Accesses.find(Access)->second; SmallVector Insts; - transform(IndexVector, std::back_inserter(Insts), - [&](unsigned Idx) { return this->InstMap[Idx]; }); + transform(IndexVector, + std::back_inserter(Insts), + [&](unsigned Idx) { return this->InstMap[Idx]; }); return Insts; } const char *MemoryDepChecker::Dependence::DepName[] = { - "NoDep", - "Unknown", - "Forward", - "ForwardButPreventsForwarding", - "Backward", - "BackwardVectorizable", - "BackwardVectorizableButPreventsForwarding"}; + "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", + "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; void MemoryDepChecker::Dependence::print( raw_ostream &OS, unsigned Depth, @@ -1784,7 +1780,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, DominatorTree *DT) { - typedef SmallPtrSet ValueSet; + typedef SmallPtrSet ValueSet; // Holds the Load and Store instructions. SmallVector Loads; @@ -1849,7 +1845,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, auto *Ld = dyn_cast(&I); if (!Ld) { recordAnalysis("CantVectorizeInstruction", Ld) - << "instruction cannot be vectorized"; + << "instruction cannot be vectorized"; HasComplexMemInst = true; continue; } @@ -1891,7 +1887,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, collectStridedAccess(St); } } // Next instr. - } // Next block. + } // Next block. if (HasComplexMemInst) { CanVecMem = false; @@ -1910,8 +1906,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1972,8 +1967,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, IsReadOnlyPtr = true; } - // See if there is an unsafe dependency between a load to a uniform address - // and store to the same uniform address. + // See if there is an unsafe dependency between a load to a uniform address and + // store to the same uniform address. if (UniformStores.count(Ptr)) { LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform " "load and uniform store to the same address!\n"); @@ -2015,8 +2010,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } LLVM_DEBUG( - dbgs() - << "LAA: May be able to perform a memory runtime check if needed.\n"); + dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -2053,7 +2047,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasConvergentOp) { recordAnalysis("CantInsertRuntimeCheckWithConvergent") - << "cannot add control dependency to convergent operation"; + << "cannot add control dependency to convergent operation"; LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"); CanVecMem = false; @@ -2076,11 +2070,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, - DominatorTree *DT) { + DominatorTree *DT) { assert(TheLoop->contains(BB) && "Unknown block used"); // Blocks that do not dominate the latch need predication. - BasicBlock *Latch = TheLoop->getLoopLatch(); + BasicBlock* Latch = TheLoop->getLoopLatch(); return !DT->dominates(BB, Latch); } @@ -2099,8 +2093,8 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, DL = I->getDebugLoc(); } - Report = std::make_unique(DEBUG_TYPE, RemarkName, - DL, CodeRegion); + Report = std::make_unique(DEBUG_TYPE, RemarkName, DL, + CodeRegion); return *Report; } @@ -2157,8 +2151,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); else CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); - const SCEV *StrideMinusBETaken = - SE->getMinusSCEV(CastedStride, CastedBECount); + const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); // Since TripCount == BackEdgeTakenCount + 1, checking: // "Stride >= TripCount" is equivalent to checking: // Stride - BETakenCount > 0 @@ -2267,12 +2260,12 @@ bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { } void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); - AU.setPreservesAll(); + AU.setPreservesAll(); } char LoopAccessLegacyAnalysis::ID = 0; @@ -2295,6 +2288,8 @@ LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, namespace llvm { -Pass *createLAAPass() { return new LoopAccessLegacyAnalysis(); } + Pass *createLAAPass() { + return new LoopAccessLegacyAnalysis(); + } } // end namespace llvm