diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index cb6f47e3a76be..f715e0ec8dbb4 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -216,6 +216,21 @@ class MemoryDepChecker { return MaxSafeVectorWidthInBits; } + /// Return true if there are no store-load forwarding dependencies. + bool isSafeForAnyStoreLoadForwardDistances() const { + return MaxStoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max(); + } + + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits. + uint64_t getStoreLoadForwardSafeDistanceInBits() const { + assert(!isSafeForAnyStoreLoadForwardDistances() && + "Expected the distance, that prevent store-load forwarding, to be " + "set."); + return MaxStoreLoadForwardSafeDistanceInBits; + } + /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() const { @@ -304,6 +319,11 @@ class MemoryDepChecker { /// restrictive. uint64_t MaxSafeVectorWidthInBits = -1U; + /// Maximum power-of-2 number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits. + uint64_t MaxStoreLoadForwardSafeDistanceInBits = + std::numeric_limits::max(); + /// If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. bool FoundNonConstantDistanceDependence = false; @@ -357,7 +377,8 @@ class MemoryDepChecker { /// /// \return false if we shouldn't vectorize at all or avoid larger /// vectorization factors by limiting MinDepDistBytes. - bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); + bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize, + unsigned CommonStride = 0); /// Updates the current safety status with \p S. We can go from Safe to /// either PossiblySafeWithRtChecks or Unsafe and from diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index c3a04f9373dbe..d654ac3ec9273 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -382,7 +382,8 @@ class LoopVectorizationLegality { const LoopAccessInfo *getLAI() const { return LAI; } bool isSafeForAnyVectorWidth() const { - return LAI->getDepChecker().isSafeForAnyVectorWidth(); + return LAI->getDepChecker().isSafeForAnyVectorWidth() && + LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); } uint64_t getMaxSafeVectorWidthInBits() const { @@ -406,6 +407,17 @@ class LoopVectorizationLegality { return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; } + /// Return true if there is store-load forwarding dependencies. + bool isSafeForAnyStoreLoadForwardDistances() const { + return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); + } + + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding and safe to operate simultaneously. + uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const { + return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits(); + } + /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) const { diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 57a76bc7a81e5..a530bf1d16f29 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1741,7 +1741,8 @@ bool MemoryDepChecker::Dependence::isForward() const { } bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, - uint64_t TypeByteSize) { + uint64_t TypeByteSize, + unsigned CommonStride) { // If loads occur at a distance that is not a multiple of a feasible vector // factor store-load forwarding does not take place. // Positive dependences might cause troubles because vectorizing them might @@ -1756,31 +1757,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // cause any slowdowns. const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. - uint64_t MaxVFWithoutSLForwardIssues = std::min( - VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); + uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = + std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, + MaxStoreLoadForwardSafeDistanceInBits); // Compute the smallest VF at which the store and load would be misaligned. - for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; - VF *= 2) { + for (uint64_t VF = 2 * TypeByteSize; + VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) { // If the number of vector iteration between the store and the load are // small we could incur conflicts. if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { - MaxVFWithoutSLForwardIssues = (VF >> 1); + MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1); break; } } - if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } - if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && - MaxVFWithoutSLForwardIssues != - VectorizerParams::MaxVectorWidth * TypeByteSize) - MinDepDistBytes = MaxVFWithoutSLForwardIssues; + if (CommonStride && + MaxVFWithoutSLForwardIssuesPowerOf2 < + MaxStoreLoadForwardSafeDistanceInBits && + MaxVFWithoutSLForwardIssuesPowerOf2 != + VectorizerParams::MaxVectorWidth * TypeByteSize) { + uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride; + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + MaxStoreLoadForwardSafeDistanceInBits = + std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits); + } return false; } @@ -2228,20 +2236,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::min(static_cast(MinDistance), MinDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); - uint64_t MinDepDistBytesOld = MinDepDistBytes; if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist && - couldPreventStoreLoadForward(MinDistance, TypeByteSize)) { - // Sanity check that we didn't update MinDepDistBytes when calling - // couldPreventStoreLoadForward - assert(MinDepDistBytes == MinDepDistBytesOld && - "An update to MinDepDistBytes requires an update to " - "MaxSafeVectorWidthInBits"); - (void)MinDepDistBytesOld; + couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; - } - // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits - // since there is a backwards dependency. uint64_t MaxVF = MinDepDistBytes / *CommonStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); @@ -3006,6 +3004,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (!DC.isSafeForAnyVectorWidth()) OS << " with a maximum safe vector width of " << DC.getMaxSafeVectorWidthInBits() << " bits"; + if (!DC.isSafeForAnyStoreLoadForwardDistances()) { + uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits(); + OS << ", with a maximum safe store-load forward width of " << SLDist + << " bits"; + } if (PtrRtChecking->Need) OS << " with run-time checks"; OS << "\n"; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 67509bd598f6a..20169c9c64f35 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3814,13 +3814,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF( // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). - unsigned MaxSafeElements = - llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + unsigned MaxSafeElementsPowerOf2 = + bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + if (!Legal->isSafeForAnyStoreLoadForwardDistances()) { + unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits(); + MaxSafeElementsPowerOf2 = + std::min(MaxSafeElementsPowerOf2, SLDist / WidestType); + } + auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2); + auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2); - auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements); - auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements); if (!Legal->isSafeForAnyVectorWidth()) - this->MaxSafeElements = MaxSafeElements; + this->MaxSafeElements = MaxSafeElementsPowerOf2; LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF << ".\n"); diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll index efa3100464759..8e249b36f6445 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll @@ -4,7 +4,7 @@ ; for (i = 0; i < n; i++) ; A[i + 4] = A[i] * 2; -; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits +; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll index ef19e173b6599..335ad67faee04 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll @@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body define void @vectorizable_Read_Write(ptr nocapture %A) { ; CHECK-LABEL: 'vectorizable_Read_Write' ; CHECK-NEXT: for.body: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: BackwardVectorizable: ; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ->