@@ -145,7 +145,8 @@ class MVEGatherScatterLowering : public FunctionPass {
145
145
// Optimise the base and offsets of the given address
146
146
bool optimiseAddress (Value *Address, BasicBlock *BB, LoopInfo *LI);
147
147
// Try to fold consecutive geps together into one
148
- Value *foldGEP (GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
148
+ Value *foldGEP (GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale,
149
+ IRBuilder<> &Builder);
149
150
// Check whether these offsets could be moved out of the loop they're in
150
151
bool optimiseOffsets (Value *Offsets, BasicBlock *BB, LoopInfo *LI);
151
152
// Pushes the given add out of the loop
@@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
1103
1104
return true ;
1104
1105
}
1105
1106
1106
- static Value *CheckAndCreateOffsetAdd (Value *X, Value *Y , Value *GEP ,
1107
- IRBuilder<> &Builder) {
1107
+ static Value *CheckAndCreateOffsetAdd (Value *X, unsigned ScaleX , Value *Y ,
1108
+ unsigned ScaleY, IRBuilder<> &Builder) {
1108
1109
// Splat the non-vector value to a vector of the given type - if the value is
1109
1110
// a constant (and its value isn't too big), we can even use this opportunity
1110
1111
// to scale it to the size of the vector elements
@@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
1156
1157
ConstantInt *ConstYEl =
1157
1158
dyn_cast<ConstantInt>(ConstY->getAggregateElement (i));
1158
1159
if (!ConstXEl || !ConstYEl ||
1159
- ConstXEl->getZExtValue () + ConstYEl->getZExtValue () >=
1160
+ ConstXEl->getZExtValue () * ScaleX +
1161
+ ConstYEl->getZExtValue () * ScaleY >=
1160
1162
(unsigned )(1 << (TargetElemSize - 1 )))
1161
1163
return nullptr ;
1162
1164
}
1163
1165
}
1164
1166
1165
- Value *Add = Builder.CreateAdd (X, Y);
1167
+ Value *XScale = Builder.CreateVectorSplat (
1168
+ XElType->getNumElements (),
1169
+ Builder.getIntN (XElType->getScalarSizeInBits (), ScaleX));
1170
+ Value *YScale = Builder.CreateVectorSplat (
1171
+ YElType->getNumElements (),
1172
+ Builder.getIntN (YElType->getScalarSizeInBits (), ScaleY));
1173
+ Value *Add = Builder.CreateAdd (Builder.CreateMul (X, XScale),
1174
+ Builder.CreateMul (Y, YScale));
1166
1175
1167
- FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType ());
1168
- if (checkOffsetSize (Add, GEPType->getNumElements ()))
1176
+ if (checkOffsetSize (Add, XElType->getNumElements ()))
1169
1177
return Add;
1170
1178
else
1171
1179
return nullptr ;
1172
1180
}
1173
1181
1174
1182
Value *MVEGatherScatterLowering::foldGEP (GetElementPtrInst *GEP,
1175
- Value *&Offsets,
1183
+ Value *&Offsets, unsigned &Scale,
1176
1184
IRBuilder<> &Builder) {
1177
1185
Value *GEPPtr = GEP->getPointerOperand ();
1178
1186
Offsets = GEP->getOperand (1 );
1187
+ Scale = DL->getTypeAllocSize (GEP->getSourceElementType ());
1179
1188
// We only merge geps with constant offsets, because only for those
1180
1189
// we can make sure that we do not cause an overflow
1181
- if (!isa<Constant>(Offsets))
1190
+ if (GEP-> getNumIndices () != 1 || !isa<Constant>(Offsets))
1182
1191
return nullptr ;
1183
- GetElementPtrInst *BaseGEP;
1184
- if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
1192
+ if (GetElementPtrInst *BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr)) {
1185
1193
// Merge the two geps into one
1186
- Value *BaseBasePtr = foldGEP (BaseGEP, Offsets, Builder);
1194
+ Value *BaseBasePtr = foldGEP (BaseGEP, Offsets, Scale, Builder);
1187
1195
if (!BaseBasePtr)
1188
1196
return nullptr ;
1189
- Offsets =
1190
- CheckAndCreateOffsetAdd (Offsets, GEP->getOperand (1 ), GEP, Builder);
1197
+ Offsets = CheckAndCreateOffsetAdd (
1198
+ Offsets, Scale, GEP->getOperand (1 ),
1199
+ DL->getTypeAllocSize (GEP->getSourceElementType ()), Builder);
1191
1200
if (Offsets == nullptr )
1192
1201
return nullptr ;
1202
+ Scale = 1 ; // Scale is always an i8 at this point.
1193
1203
return BaseBasePtr;
1194
1204
}
1195
1205
return GEPPtr;
@@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
1206
1216
Builder.SetInsertPoint (GEP);
1207
1217
Builder.SetCurrentDebugLocation (GEP->getDebugLoc ());
1208
1218
Value *Offsets;
1209
- Value *Base = foldGEP (GEP, Offsets, Builder);
1219
+ unsigned Scale;
1220
+ Value *Base = foldGEP (GEP, Offsets, Scale, Builder);
1210
1221
// We only want to merge the geps if there is a real chance that they can be
1211
1222
// used by an MVE gather; thus the offset has to have the correct size
1212
1223
// (always i32 if it is not of vector type) and the base has to be a
1213
1224
// pointer.
1214
1225
if (Offsets && Base && Base != GEP) {
1226
+ assert (Scale == 1 && " Expected to fold GEP to a scale of 1" );
1227
+ Type *BaseTy = Builder.getInt8PtrTy ();
1228
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType ()))
1229
+ BaseTy = FixedVectorType::get (BaseTy, VecTy);
1215
1230
GetElementPtrInst *NewAddress = GetElementPtrInst::Create (
1216
- GEP->getSourceElementType (), Base, Offsets, " gep.merged" , GEP);
1217
- GEP->replaceAllUsesWith (NewAddress);
1231
+ Builder.getInt8Ty (), Builder.CreateBitCast (Base, BaseTy), Offsets,
1232
+ " gep.merged" , GEP);
1233
+ LLVM_DEBUG (dbgs () << " Folded GEP: " << *GEP
1234
+ << " \n new : " << *NewAddress << " \n " );
1235
+ GEP->replaceAllUsesWith (
1236
+ Builder.CreateBitCast (NewAddress, GEP->getType ()));
1218
1237
GEP = NewAddress;
1219
1238
Changed = true ;
1220
1239
}
0 commit comments