Skip to content

Commit b3b23dd

Browse files
fhahnGeorgeARM
authored andcommitted
[VPlan] Don't rely on region check in isUniformAfterVectorization. (llvm#137883)
Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases. PR: llvm#137883
1 parent 899517a commit b3b23dd

7 files changed

+61
-38
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,46 @@ const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
3939

4040
/// Returns true if \p VPV is uniform after vectorization.
4141
inline bool isUniformAfterVectorization(const VPValue *VPV) {
42-
// A value defined outside the vector region must be uniform after
43-
// vectorization inside a vector region.
44-
if (VPV->isDefinedOutsideLoopRegions())
42+
auto PreservesUniformity = [](unsigned Opcode) -> bool {
43+
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
44+
return true;
45+
switch (Opcode) {
46+
case Instruction::GetElementPtr:
47+
case Instruction::ICmp:
48+
case Instruction::FCmp:
49+
case VPInstruction::Broadcast:
50+
case VPInstruction::PtrAdd:
51+
return true;
52+
default:
53+
return false;
54+
}
55+
};
56+
57+
// A live-in must be uniform across the scope of VPlan.
58+
if (VPV->isLiveIn())
4559
return true;
46-
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV))
47-
return Rep->isUniform();
60+
61+
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
62+
const VPRegionBlock *RegionOfR = Rep->getParent()->getParent();
63+
// Don't consider recipes in replicate regions as uniform yet; their first
64+
// lane cannot be accessed when executing the replicate region for other
65+
// lanes.
66+
if (RegionOfR && RegionOfR->isReplicator())
67+
return false;
68+
return Rep->isUniform() ||
69+
(PreservesUniformity(Rep->getOpcode()) &&
70+
all_of(Rep->operands(), isUniformAfterVectorization));
71+
}
4872
if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(VPV))
4973
return all_of(VPV->getDefiningRecipe()->operands(),
5074
isUniformAfterVectorization);
75+
if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
76+
return PreservesUniformity(WidenR->getOpcode()) &&
77+
all_of(WidenR->operands(), isUniformAfterVectorization);
78+
}
5179
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
5280
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
53-
((Instruction::isBinaryOp(VPI->getOpcode()) ||
54-
VPI->getOpcode() == VPInstruction::PtrAdd) &&
81+
(PreservesUniformity(VPI->getOpcode()) &&
5582
all_of(VPI->operands(), isUniformAfterVectorization));
5683

5784
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,21 +557,21 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
557557
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
558558
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
559559
; DEFAULT: [[PRED_STORE_IF32]]:
560-
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1
560+
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
561561
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
562562
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
563563
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
564564
; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
565565
; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
566566
; DEFAULT: [[PRED_STORE_IF34]]:
567-
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
567+
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
568568
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
569569
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
570570
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
571571
; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
572572
; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
573573
; DEFAULT: [[PRED_STORE_IF36]]:
574-
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
574+
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
575575
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
576576
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
577577
; DEFAULT: [[PRED_STORE_CONTINUE37]]:

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,12 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
7474
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[INDEX]], 2
7575
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
7676
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
77-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
77+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
78+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
7879
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
79-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
8080
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
8181
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
82-
; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 8
82+
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
8383
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
8484
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
8585
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,12 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
7474
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[INDEX]], -2
7575
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
7676
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
77-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
77+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
78+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
7879
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
79-
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
8080
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
8181
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
82-
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
82+
; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
8383
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
8484
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
8585
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -269,15 +269,11 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
269269
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1
270270
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
271271
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
272-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
273-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
274-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
272+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
275273
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
276274
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
277-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
278-
; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP6]], align 8
279-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
280-
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8
275+
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
276+
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
281277
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
282278
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
283279
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,12 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
290290
; CHECK-NEXT: [[TMP2:%.*]] = urem i64 [[TMP1]], 3
291291
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
292292
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
293-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
293+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
294+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP5]], i64 0
294295
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
295-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
296296
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
297297
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
298-
; CHECK-NEXT: store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8
298+
; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8
299299
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
300300
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
301301
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
113113
; VF2-NEXT: [[TMP0:%.*]] = lshr i64 [[INDEX]], 1
114114
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
115115
; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
116-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
116+
; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42
117+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
117118
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
118-
; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
119119
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
120120
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
121-
; VF2-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 8
121+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
122122
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
123123
; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
124124
; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -225,12 +225,12 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
225225
; VF2-NEXT: [[TMP1:%.*]] = lshr i64 [[INDEX]], 2
226226
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
227227
; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
228-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
228+
; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
229+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
229230
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
230-
; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
231231
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
232232
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
233-
; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
233+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
234234
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
235235
; VF2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
236236
; VF2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -264,12 +264,12 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
264264
; VF4-NEXT: [[TMP1:%.*]] = lshr i64 [[INDEX]], 2
265265
; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
266266
; VF4-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
267-
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
267+
; VF4-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42
268+
; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
268269
; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
269-
; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
270270
; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
271271
; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
272-
; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8
272+
; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8
273273
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
274274
; VF4-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
275275
; VF4-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]

llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,12 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
157157
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
158158
; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
159159
; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
160-
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
160+
; VF2-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], 42
161+
; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i64 0
161162
; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
162-
; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
163163
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
164164
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
165-
; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8
165+
; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], align 8
166166
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
167167
; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
168168
; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]

0 commit comments

Comments
 (0)