@@ -88,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
88
88
StoreToLoadForwardingCandidate (LoadInst *Load, StoreInst *Store)
89
89
: Load(Load), Store(Store) {}
90
90
91
- // / Return true if the dependence from the store to the load has a
92
- // / distance of one. E.g. A[i+1] = A[i]
91
+ // / Return true if the dependence from the store to the load has an
92
+ // / absolute distance of one.
93
+ // / E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
93
94
bool isDependenceDistanceOfOne (PredicatedScalarEvolution &PSE,
94
95
Loop *L) const {
95
96
Value *LoadPtr = Load->getPointerOperand ();
@@ -103,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
103
104
DL.getTypeSizeInBits (getLoadStoreType (Store)) &&
104
105
" Should be a known dependence" );
105
106
106
- // Currently we only support accesses with unit stride. FIXME: we should be
107
- // able to handle non unit stirde as well as long as the stride is equal to
108
- // the dependence distance.
109
- if (getPtrStride (PSE, LoadType, LoadPtr, L).value_or (0 ) != 1 ||
110
- getPtrStride (PSE, LoadType, StorePtr, L).value_or (0 ) != 1 )
107
+ int64_t StrideLoad = getPtrStride (PSE, LoadType, LoadPtr, L).value_or (0 );
108
+ int64_t StrideStore = getPtrStride (PSE, LoadType, StorePtr, L).value_or (0 );
109
+ if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
110
+ return false ;
111
+
112
+ // TODO: This check for stride values other than 1 and -1 can be eliminated.
113
+ // However, doing so may cause the LoopAccessAnalysis to overcompensate,
114
+ // generating numerous non-wrap runtime checks that may undermine the
115
+ // benefits of load elimination. To safely implement support for non-unit
116
+ // strides, we would need to ensure either that the processed case does not
117
+ // require these additional checks, or improve the LAA to handle them more
118
+ // efficiently, or potentially both.
119
+ if (std::abs (StrideLoad) != 1 )
111
120
return false ;
112
121
113
122
unsigned TypeByteSize = DL.getTypeAllocSize (const_cast <Type *>(LoadType));
@@ -120,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
120
129
auto *Dist = cast<SCEVConstant>(
121
130
PSE.getSE ()->getMinusSCEV (StorePtrSCEV, LoadPtrSCEV));
122
131
const APInt &Val = Dist->getAPInt ();
123
- return Val == TypeByteSize;
132
+ return Val == TypeByteSize * StrideLoad ;
124
133
}
125
134
126
135
Value *getLoadPtr () const { return Load->getPointerOperand (); }
0 commit comments