Skip to content

Commit 78777a2

Browse files
[LV]Split store-load forward distance analysis from other checks, NFC (#121156)
The patch splits the store-load forwarding distance analysis from other dependency analysis in LAA. Currently it supports only power-of-2 distances, required to support non-power-of-2 distances in future. Part of #100755
1 parent f82283a commit 78777a2

File tree

6 files changed

+72
-31
lines changed

6 files changed

+72
-31
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,21 @@ class MemoryDepChecker {
216216
return MaxSafeVectorWidthInBits;
217217
}
218218

219+
/// Return true if there are no store-load forwarding dependencies.
220+
bool isSafeForAnyStoreLoadForwardDistances() const {
221+
return MaxStoreLoadForwardSafeDistanceInBits ==
222+
std::numeric_limits<uint64_t>::max();
223+
}
224+
225+
/// Return safe power-of-2 number of elements, which do not prevent store-load
226+
/// forwarding, multiplied by the size of the elements in bits.
227+
uint64_t getStoreLoadForwardSafeDistanceInBits() const {
228+
assert(!isSafeForAnyStoreLoadForwardDistances() &&
229+
"Expected the distance, that prevent store-load forwarding, to be "
230+
"set.");
231+
return MaxStoreLoadForwardSafeDistanceInBits;
232+
}
233+
219234
/// In same cases when the dependency check fails we can still
220235
/// vectorize the loop with a dynamic array access check.
221236
bool shouldRetryWithRuntimeCheck() const {
@@ -304,6 +319,11 @@ class MemoryDepChecker {
304319
/// restrictive.
305320
uint64_t MaxSafeVectorWidthInBits = -1U;
306321

322+
/// Maximum power-of-2 number of elements, which do not prevent store-load
323+
/// forwarding, multiplied by the size of the elements in bits.
324+
uint64_t MaxStoreLoadForwardSafeDistanceInBits =
325+
std::numeric_limits<uint64_t>::max();
326+
307327
/// If we see a non-constant dependence distance we can still try to
308328
/// vectorize this loop with runtime checks.
309329
bool FoundNonConstantDistanceDependence = false;
@@ -357,7 +377,8 @@ class MemoryDepChecker {
357377
///
358378
/// \return false if we shouldn't vectorize at all or avoid larger
359379
/// vectorization factors by limiting MinDepDistBytes.
360-
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
380+
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize,
381+
unsigned CommonStride = 0);
361382

362383
/// Updates the current safety status with \p S. We can go from Safe to
363384
/// either PossiblySafeWithRtChecks or Unsafe and from

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ class LoopVectorizationLegality {
382382
const LoopAccessInfo *getLAI() const { return LAI; }
383383

384384
bool isSafeForAnyVectorWidth() const {
385-
return LAI->getDepChecker().isSafeForAnyVectorWidth();
385+
return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
386+
LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
386387
}
387388

388389
uint64_t getMaxSafeVectorWidthInBits() const {
@@ -406,6 +407,17 @@ class LoopVectorizationLegality {
406407
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
407408
}
408409

410+
/// Return true if there is store-load forwarding dependencies.
411+
bool isSafeForAnyStoreLoadForwardDistances() const {
412+
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
413+
}
414+
415+
/// Return safe power-of-2 number of elements, which do not prevent store-load
416+
/// forwarding and safe to operate simultaneously.
417+
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
418+
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
419+
}
420+
409421
/// Returns true if vector representation of the instruction \p I
410422
/// requires mask.
411423
bool isMaskRequired(const Instruction *I) const {

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
17401740
}
17411741

17421742
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1743-
uint64_t TypeByteSize) {
1743+
uint64_t TypeByteSize,
1744+
unsigned CommonStride) {
17441745
// If loads occur at a distance that is not a multiple of a feasible vector
17451746
// factor store-load forwarding does not take place.
17461747
// Positive dependences might cause troubles because vectorizing them might
@@ -1755,31 +1756,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
17551756
// cause any slowdowns.
17561757
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
17571758
// Maximum vector factor.
1758-
uint64_t MaxVFWithoutSLForwardIssues = std::min(
1759-
VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
1759+
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1760+
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
1761+
MaxStoreLoadForwardSafeDistanceInBits);
17601762

17611763
// Compute the smallest VF at which the store and load would be misaligned.
1762-
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1763-
VF *= 2) {
1764+
for (uint64_t VF = 2 * TypeByteSize;
1765+
VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
17641766
// If the number of vector iteration between the store and the load are
17651767
// small we could incur conflicts.
17661768
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1767-
MaxVFWithoutSLForwardIssues = (VF >> 1);
1769+
MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
17681770
break;
17691771
}
17701772
}
17711773

1772-
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1774+
if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
17731775
LLVM_DEBUG(
17741776
dbgs() << "LAA: Distance " << Distance
17751777
<< " that could cause a store-load forwarding conflict\n");
17761778
return true;
17771779
}
17781780

1779-
if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
1780-
MaxVFWithoutSLForwardIssues !=
1781-
VectorizerParams::MaxVectorWidth * TypeByteSize)
1782-
MinDepDistBytes = MaxVFWithoutSLForwardIssues;
1781+
if (CommonStride &&
1782+
MaxVFWithoutSLForwardIssuesPowerOf2 <
1783+
MaxStoreLoadForwardSafeDistanceInBits &&
1784+
MaxVFWithoutSLForwardIssuesPowerOf2 !=
1785+
VectorizerParams::MaxVectorWidth * TypeByteSize) {
1786+
uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride;
1787+
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1788+
MaxStoreLoadForwardSafeDistanceInBits =
1789+
std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1790+
}
17831791
return false;
17841792
}
17851793

@@ -2227,20 +2235,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22272235
std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
22282236

22292237
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2230-
uint64_t MinDepDistBytesOld = MinDepDistBytes;
22312238
if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2232-
couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
2233-
// Sanity check that we didn't update MinDepDistBytes when calling
2234-
// couldPreventStoreLoadForward
2235-
assert(MinDepDistBytes == MinDepDistBytesOld &&
2236-
"An update to MinDepDistBytes requires an update to "
2237-
"MaxSafeVectorWidthInBits");
2238-
(void)MinDepDistBytesOld;
2239+
couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
22392240
return Dependence::BackwardVectorizableButPreventsForwarding;
2240-
}
22412241

2242-
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2243-
// since there is a backwards dependency.
22442242
uint64_t MaxVF = MinDepDistBytes / *CommonStride;
22452243
LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
22462244
<< " with max VF = " << MaxVF << '\n');
@@ -3005,6 +3003,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
30053003
if (!DC.isSafeForAnyVectorWidth())
30063004
OS << " with a maximum safe vector width of "
30073005
<< DC.getMaxSafeVectorWidthInBits() << " bits";
3006+
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
3007+
uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
3008+
OS << ", with a maximum safe store-load forward width of " << SLDist
3009+
<< " bits";
3010+
}
30083011
if (PtrRtChecking->Need)
30093012
OS << " with run-time checks";
30103013
OS << "\n";

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3815,13 +3815,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
38153815
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
38163816
// the memory accesses that is most restrictive (involved in the smallest
38173817
// dependence distance).
3818-
unsigned MaxSafeElements =
3819-
llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
3818+
unsigned MaxSafeElementsPowerOf2 =
3819+
bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
3820+
if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
3821+
unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
3822+
MaxSafeElementsPowerOf2 =
3823+
std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
3824+
}
3825+
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
3826+
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
38203827

3821-
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
3822-
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
38233828
if (!Legal->isSafeForAnyVectorWidth())
3824-
this->MaxSafeElements = MaxSafeElements;
3829+
this->MaxSafeElements = MaxSafeElementsPowerOf2;
38253830

38263831
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
38273832
<< ".\n");

llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; for (i = 0; i < n; i++)
55
; A[i + 4] = A[i] * 2;
66

7-
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits
7+
; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
88

99
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
1010
target triple = "x86_64-apple-macosx10.10.0"

llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body
276276
define void @vectorizable_Read_Write(ptr nocapture %A) {
277277
; CHECK-LABEL: 'vectorizable_Read_Write'
278278
; CHECK-NEXT: for.body:
279-
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits
279+
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
280280
; CHECK-NEXT: Dependences:
281281
; CHECK-NEXT: BackwardVectorizable:
282282
; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ->

0 commit comments

Comments
 (0)