Skip to content

Commit d4f38d8

Browse files
committed
[SROA] Only try additional vector type candidates when needed
Change-Id: I06f3026b616ddc03d09ec6c416ad4cc15d837d96
1 parent f21673b commit d4f38d8

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22892289
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
22902290
CheckCandidateType(Ty);
22912291
}
2292+
2293+
if (auto *VTy = checkVectorTypesForPromotion(
2294+
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2295+
HaveCommonVecPtrTy, CommonVecPtrTy))
2296+
return VTy;
2297+
22922298
// Consider additional vector types where the element type size is a
22932299
// multiple of load/store element size.
22942300
for (Type *Ty : LoadStoreTys) {
@@ -2298,6 +2304,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22982304
// Make a copy of CandidateTys and iterate through it, because we might
22992305
// append to CandidateTys in the loop.
23002306
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2307+
CandidateTys.clear();
23012308
for (VectorType *&VTy : CandidateTysCopy) {
23022309
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
23032310
unsigned ElementSize =

llvm/test/Transforms/SROA/vector-promotion.ll

+80
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,86 @@ define void @swap-15bytes(ptr %x, ptr %y) {
12271227
ret void
12281228
}
12291229

1230+
define <4 x i32> @ptrLoadStoreTys(ptr %init, i32 %val2) {
1231+
; CHECK-LABEL: @ptrLoadStoreTys(
1232+
; CHECK-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8
1233+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64
1234+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
1235+
; CHECK-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1236+
; CHECK-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer
1237+
; CHECK-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2
1238+
; CHECK-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3
1239+
; CHECK-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]]
1240+
;
1241+
; DEBUG-LABEL: @ptrLoadStoreTys(
1242+
; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG492:![0-9]+]]
1243+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]]
1244+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493:![0-9]+]]
1245+
; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64, !dbg [[DBG494:![0-9]+]]
1246+
; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG494]]
1247+
; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>, !dbg [[DBG494]]
1248+
; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG494]]
1249+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495:![0-9]+]]
1250+
; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG496:![0-9]+]]
1251+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497:![0-9]+]]
1252+
; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG498:![0-9]+]]
1253+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG499:![0-9]+]]
1254+
; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG500:![0-9]+]]
1255+
;
1256+
%val0 = load ptr, ptr %init, align 8
1257+
%obj = alloca <4 x i32>, align 16
1258+
store <4 x i32> zeroinitializer, ptr %obj, align 16
1259+
store ptr %val0, ptr %obj, align 8
1260+
%ptr2 = getelementptr inbounds i8, ptr %obj, i64 8
1261+
store i32 %val2, ptr %ptr2, align 4
1262+
%ptr3 = getelementptr inbounds i8, ptr %obj, i64 12
1263+
store i32 131072, ptr %ptr3, align 4
1264+
%sroaval = load <4 x i32>, ptr %obj, align 16
1265+
ret <4 x i32> %sroaval
1266+
}
1267+
1268+
define <4 x float> @ptrLoadStoreTysFloat(ptr %init, float %val2) {
1269+
; CHECK-LABEL: @ptrLoadStoreTysFloat(
1270+
; CHECK-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8
1271+
; CHECK-NEXT: [[OBJ:%.*]] = alloca <4 x float>, align 16
1272+
; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[OBJ]], align 16
1273+
; CHECK-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16
1274+
; CHECK-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8
1275+
; CHECK-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8
1276+
; CHECK-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12
1277+
; CHECK-NEXT: store float 1.310720e+05, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4
1278+
; CHECK-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x float>, ptr [[OBJ]], align 16
1279+
; CHECK-NEXT: ret <4 x float> [[OBJ_0_SROAVAL]]
1280+
;
1281+
; DEBUG-LABEL: @ptrLoadStoreTysFloat(
1282+
; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG508:![0-9]+]]
1283+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG508]]
1284+
; DEBUG-NEXT: [[OBJ:%.*]] = alloca <4 x float>, align 16, !dbg [[DBG509:![0-9]+]]
1285+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]]
1286+
; DEBUG-NEXT: store <4 x float> zeroinitializer, ptr [[OBJ]], align 16, !dbg [[DBG510:![0-9]+]]
1287+
; DEBUG-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16, !dbg [[DBG511:![0-9]+]]
1288+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG512:![0-9]+]]
1289+
; DEBUG-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG513:![0-9]+]]
1290+
; DEBUG-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8, !dbg [[DBG513]]
1291+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META506:![0-9]+]], metadata !DIExpression()), !dbg [[DBG514:![0-9]+]]
1292+
; DEBUG-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG515:![0-9]+]]
1293+
; DEBUG-NEXT: store float 1.310720e+05, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4, !dbg [[DBG515]]
1294+
; DEBUG-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x float>, ptr [[OBJ]], align 16, !dbg [[DBG516:![0-9]+]]
1295+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x float> [[OBJ_0_SROAVAL]], metadata [[META507:![0-9]+]], metadata !DIExpression()), !dbg [[DBG516]]
1296+
; DEBUG-NEXT: ret <4 x float> [[OBJ_0_SROAVAL]], !dbg [[DBG517:![0-9]+]]
1297+
;
1298+
%val0 = load ptr, ptr %init, align 8
1299+
%obj = alloca <4 x float>, align 16
1300+
store <4 x float> zeroinitializer, ptr %obj, align 16
1301+
store ptr %val0, ptr %obj, align 8
1302+
%ptr2 = getelementptr inbounds i8, ptr %obj, i64 8
1303+
store float %val2, ptr %ptr2, align 4
1304+
%ptr3 = getelementptr inbounds i8, ptr %obj, i64 12
1305+
store float 131072.0, ptr %ptr3, align 4
1306+
%sroaval = load <4 x float>, ptr %obj, align 16
1307+
ret <4 x float> %sroaval
1308+
}
1309+
12301310
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
12311311
declare void @llvm.lifetime.end.p0(i64, ptr)
12321312
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

0 commit comments

Comments
 (0)