Skip to content

Commit bbff5b8

Browse files
committed
[VPlan] Use alloc-type to compute interleave group offset.
Use getAllocTypeSize to get compute the offset to the start of interleave groups instead getScalarSizeInBits, which may return 0 for pointers. This is in line with the analysis building the interleave groups and fixes a mis-compile reported for #106431.
1 parent 484c027 commit bbff5b8

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1646,8 +1646,9 @@ void VPlanTransforms::createInterleaveGroups(
16461646
// zero.
16471647
assert(IG->getIndex(IRInsertPos) != 0 &&
16481648
"index of insert position shouldn't be zero");
1649+
auto &DL = IRInsertPos->getDataLayout();
16491650
APInt Offset(32,
1650-
getLoadStoreType(IRInsertPos)->getScalarSizeInBits() / 8 *
1651+
DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
16511652
IG->getIndex(IRInsertPos),
16521653
/*IsSigned=*/true);
16531654
VPValue *OffsetVPV = Plan.getOrAddLiveIn(

llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ define i64 @interleave_group_load_pointer_type(ptr %start, ptr %end) {
183183
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
184184
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
185185
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16
186-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
186+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 -8
187187
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x ptr>, ptr [[TMP8]], align 8
188188
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x ptr> [[WIDE_VEC]], <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
189189
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x ptr> [[WIDE_VEC]], <12 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>

0 commit comments

Comments
 (0)