Skip to content

Commit b08e892

Browse files
committed
[VPlan] Consistently use (Part, 0) for first lane scalar values
At the moment, some VPInstructions create only a single scalar value, but use VPTransformatState's 'vector' storage for this value. Those values are effectively uniform-per-VF (or in some cases uniform-across-VF-and-UF). Using the vector/per-part storage doesn't interact well with other recipes, that more accurately using (Part, Lane) to look up scalar values and prevents VPInstructions creating scalars from interacting with other recipes working with scalars. This PR tries to unify handling of scalars by using (Part, 0) for scalar values where only the first lane is demanded. This allows using VPInstructions with other recipes like VPScalarCastRecipe and is also needed when using VPInstructions in more cases otuside the vector loop region to generate scalars. The patch is still a bit rough around the edges, but hopefully serves as start for a discussion how to model more scalar recipes. A potential alternative would be to split off the opcodes that generate scalars only to a separate recipe.
1 parent f4dabdf commit b08e892

File tree

5 files changed

+87
-61
lines changed

5 files changed

+87
-61
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,13 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
213213
}
214214

215215
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
216-
if (Def->isLiveIn())
217-
return Def->getLiveInIRValue();
216+
if (Def->isLiveIn()) {
217+
if (Value *V = Def->getLiveInIRValue())
218+
return V;
219+
if (hasScalarValue(Def, VPIteration(0, 0))) {
220+
return Data.PerPartScalars[Def][0][0];
221+
}
222+
}
218223

219224
if (hasScalarValue(Def, Instance)) {
220225
return Data
@@ -794,7 +799,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
794799
// FIXME: Model VF * UF computation completely in VPlan.
795800
State.set(&VFxUF,
796801
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
797-
0);
802+
VPIteration(0, 0));
798803

799804
// When vectorizing the epilogue loop, the canonical induction start value
800805
// needs to be changed from zero to the value after the main vector loop.
@@ -883,8 +888,11 @@ void VPlan::execute(VPTransformState *State) {
883888

884889
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
885890
Value *Phi = State->get(PhiR, Part);
886-
Value *Val = State->get(PhiR->getBackedgeValue(),
887-
SinglePartNeeded ? State->UF - 1 : Part);
891+
Value *Val =
892+
isa<VPCanonicalIVPHIRecipe>(PhiR)
893+
? State->get(PhiR->getBackedgeValue(), VPIteration(Part, 0))
894+
: State->get(PhiR->getBackedgeValue(),
895+
SinglePartNeeded ? State->UF - 1 : Part);
888896
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
889897
}
890898
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,13 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
13881388

13891389
/// Returns the result type of the cast.
13901390
Type *getResultType() const { return ResultTy; }
1391+
1392+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1393+
// At the moment, only scalar codegen is implemented.
1394+
assert(is_contained(operands(), Op) &&
1395+
"Op must be an operand of the recipe");
1396+
return true;
1397+
}
13911398
};
13921399

13931400
/// A recipe for widening Call instructions.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,17 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
279279
Builder.SetCurrentDebugLocation(getDebugLoc());
280280

281281
if (Instruction::isBinaryOp(getOpcode())) {
282+
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
282283
if (Part != 0 && vputils::onlyFirstPartUsed(this))
283-
return State.get(this, 0);
284-
285-
Value *A = State.get(getOperand(0), Part);
286-
Value *B = State.get(getOperand(1), Part);
284+
return OnlyFirstLaneUsed ? State.get(this, VPIteration(0, 0))
285+
: State.get(this, 0);
286+
287+
Value *A = OnlyFirstLaneUsed
288+
? State.get(getOperand(0), VPIteration(Part, 0))
289+
: State.get(getOperand(0), Part);
290+
Value *B = OnlyFirstLaneUsed
291+
? State.get(getOperand(1), VPIteration(Part, 0))
292+
: State.get(getOperand(1), Part);
287293
auto *Res =
288294
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
289295
if (auto *I = dyn_cast<Instruction>(Res))
@@ -385,8 +391,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
385391
if (Part != 0)
386392
return nullptr;
387393
// First create the compare.
388-
Value *IV = State.get(getOperand(0), Part);
389-
Value *TC = State.get(getOperand(1), Part);
394+
Value *IV = State.get(getOperand(0), VPIteration(0, 0));
395+
Value *TC = State.get(getOperand(1), VPIteration(0, 0));
390396
Value *Cond = Builder.CreateICmpEQ(IV, TC);
391397

392398
// Now create the branch.
@@ -407,7 +413,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
407413
}
408414
case VPInstruction::ComputeReductionResult: {
409415
if (Part != 0)
410-
return State.get(this, 0);
416+
return State.get(this, VPIteration(0, 0));
411417

412418
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
413419
// and will be removed by breaking up the recipe further.
@@ -512,7 +518,17 @@ void VPInstruction::execute(VPTransformState &State) {
512518
if (!hasResult())
513519
continue;
514520
assert(GeneratedValue && "generateInstruction must produce a value");
515-
State.set(this, GeneratedValue, Part);
521+
if (GeneratedValue->getType()->isVectorTy())
522+
State.set(this, GeneratedValue, Part);
523+
else {
524+
if (getOpcode() == VPInstruction::ComputeReductionResult) {
525+
State.set(this, GeneratedValue, VPIteration(Part, 0));
526+
} else {
527+
assert((State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
528+
"scalar value but not only first lane used");
529+
State.set(this, GeneratedValue, VPIteration(Part, 0));
530+
}
531+
}
516532
}
517533
}
518534
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
@@ -525,11 +541,13 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
525541
return false;
526542
case Instruction::ICmp:
527543
return vputils::onlyFirstLaneUsed(this);
544+
case VPInstruction::ComputeReductionResult:
545+
return true;
528546
case VPInstruction::ActiveLaneMask:
529547
case VPInstruction::CalculateTripCountMinusVF:
530548
case VPInstruction::CanonicalIVIncrementForPart:
531549
case VPInstruction::BranchOnCount:
532-
return getOperand(0) == Op;
550+
return true;
533551
};
534552
llvm_unreachable("switch should return");
535553
}

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
9999
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP8]], i64 1025)
100100
; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
101101
; TFA_INTERLEAVE: vector.body:
102-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
102+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
103103
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
104104
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[VECTOR_BODY]] ]
105105
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
@@ -116,8 +116,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
116116
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i64 [[TMP17]]
117117
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
118118
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP14]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
119-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
120-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT4]] = add i64 [[INDEX]], [[TMP6]]
119+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
121120
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
122121
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
123122
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = add i64 [[INDEX_NEXT]], [[TMP20]]
@@ -254,7 +253,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
254253
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP8]], i64 1025)
255254
; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
256255
; TFA_INTERLEAVE: vector.body:
257-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT5:%.*]], [[VECTOR_BODY]] ]
256+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
258257
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
259258
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], [[VECTOR_BODY]] ]
260259
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
@@ -283,8 +282,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
283282
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP27]]
284283
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[TMP23]])
285284
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[TMP24]])
286-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
287-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT5]] = add i64 [[INDEX]], [[TMP6]]
285+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
288286
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
289287
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
290288
; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = add i64 [[INDEX_NEXT]], [[TMP30]]
@@ -437,7 +435,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
437435
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP8]], i64 1025)
438436
; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
439437
; TFA_INTERLEAVE: vector.body:
440-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT5:%.*]], [[VECTOR_BODY]] ]
438+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
441439
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
442440
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], [[VECTOR_BODY]] ]
443441
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
@@ -468,8 +466,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
468466
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP29]]
469467
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[TMP25]])
470468
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[TMP26]])
471-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
472-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT5]] = add i64 [[INDEX]], [[TMP6]]
469+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
473470
; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
474471
; TFA_INTERLEAVE-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 2
475472
; TFA_INTERLEAVE-NEXT: [[TMP33:%.*]] = add i64 [[INDEX_NEXT]], [[TMP32]]
@@ -771,7 +768,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
771768
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP8]], i64 1025)
772769
; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
773770
; TFA_INTERLEAVE: vector.body:
774-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
771+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
775772
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
776773
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[VECTOR_BODY]] ]
777774
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
@@ -788,8 +785,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
788785
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i64 [[TMP17]]
789786
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
790787
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP14]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
791-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
792-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT4]] = add i64 [[INDEX]], [[TMP6]]
788+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
793789
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
794790
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
795791
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = add i64 [[INDEX_NEXT]], [[TMP20]]
@@ -970,7 +966,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
970966
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
971967
; TFA_INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
972968
; TFA_INTERLEAVE: vector.body:
973-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
969+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
974970
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
975971
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[VECTOR_BODY]] ]
976972
; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
@@ -996,8 +992,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
996992
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP23]])
997993
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> [[TMP14]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
998994
; TFA_INTERLEAVE-NEXT: [[TMP26]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[TMP24]], <vscale x 2 x double> [[TMP25]])
999-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
1000-
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT4]] = add i64 [[INDEX]], [[TMP6]]
995+
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
1001996
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
1002997
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 2
1003998
; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = add i64 [[INDEX_NEXT]], [[TMP28]]

0 commit comments

Comments
 (0)