Skip to content

Commit 873c3f7

Browse files
committed
Revert "[SLP]Remove operands upon marking instruction for deletion."
This reverts commit bbd52dd to fix a crash revealed in https://lab.llvm.org/buildbot/#/builders/4/builds/505
1 parent fd524d4 commit 873c3f7

21 files changed

+70
-93
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,12 +1163,6 @@ class BoUpSLP {
11631163
return VectorizableTree.front()->Scalars;
11641164
}
11651165

1166-
/// Checks if the root graph node can be emitted with narrower bitwidth at
1167-
/// codegen and returns it signedness, if so.
1168-
bool isSignedMinBitwidthRootNode() const {
1169-
return MinBWs.at(VectorizableTree.front().get()).second;
1170-
}
1171-
11721166
/// Builds external uses of the vectorized scalars, i.e. the list of
11731167
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
11741168
/// ExternallyUsedValues contains additional list of external uses to handle
@@ -2436,21 +2430,6 @@ class BoUpSLP {
24362430
DeletedInstructions.insert(I);
24372431
}
24382432

2439-
/// Clear the operands of \p I, marking for deletion trivially dead operands.
2440-
void clearOperands(Instruction *I, const TreeEntry *Entry = nullptr) {
2441-
for (unsigned Idx : seq<unsigned>(I->getNumOperands())) {
2442-
// Ignore pointer operand of stores to keep correct DIAssignID.
2443-
if (isa<StoreInst>(I) && Idx == 1)
2444-
continue;
2445-
Value *Op = I->getOperand(Idx);
2446-
I->setOperand(Idx, PoisonValue::get(Op->getType()));
2447-
if (auto *OpI = dyn_cast<Instruction>(Op))
2448-
if (!isDeleted(OpI) && isInstructionTriviallyDead(OpI, TLI) &&
2449-
(!Entry || Entry->VectorizedValue != OpI))
2450-
eraseInstruction(OpI);
2451-
}
2452-
}
2453-
24542433
/// Checks if the instruction was already analyzed for being possible
24552434
/// reduction root.
24562435
bool isAnalyzedReductionRoot(Instruction *I) const {
@@ -3816,7 +3795,7 @@ class BoUpSLP {
38163795

38173796
/// Performs the "real" scheduling. Done before vectorization is actually
38183797
/// performed in a basic block.
3819-
void scheduleBlock(BlockScheduling *BS, BoUpSLP &R);
3798+
void scheduleBlock(BlockScheduling *BS);
38203799

38213800
/// List of users to ignore during scheduling and that don't need extracting.
38223801
const SmallDenseSet<Value *> *UserIgnoreList = nullptr;
@@ -13545,7 +13524,7 @@ Value *BoUpSLP::vectorizeTree(
1354513524
Instruction *ReductionRoot) {
1354613525
// All blocks must be scheduled before any instructions are inserted.
1354713526
for (auto &BSIter : BlocksSchedules) {
13548-
scheduleBlock(BSIter.second.get(), *this);
13527+
scheduleBlock(BSIter.second.get());
1354913528
}
1355013529
// Clean Entry-to-LastInstruction table. It can be affected after scheduling,
1355113530
// need to rebuild it.
@@ -14085,14 +14064,11 @@ Value *BoUpSLP::vectorizeTree(
1408514064
}
1408614065
#endif
1408714066
LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
14088-
auto *I = cast<Instruction>(Scalar);
14089-
// Clear the operands, marking for deletion trivially dead operands.
14090-
clearOperands(I, Entry);
14091-
eraseInstruction(I);
14067+
eraseInstruction(cast<Instruction>(Scalar));
1409214068
// Retain to-be-deleted instructions for some debug-info
1409314069
// bookkeeping. NOTE: eraseInstruction only marks the instruction for
1409414070
// deletion - instructions are not deleted until later.
14095-
RemovedInsts.push_back(I);
14071+
RemovedInsts.push_back(cast<Instruction>(Scalar));
1409614072
}
1409714073
}
1409814074

@@ -14705,8 +14681,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
1470514681

1470614682
for (; DepDest; DepDest = DepDest->NextLoadStore) {
1470714683
assert(isInSchedulingRegion(DepDest));
14708-
if (SLP->isDeleted(DepDest->Inst))
14709-
continue;
1471014684

1471114685
// We have two limits to reduce the complexity:
1471214686
// 1) AliasedCheckLimit: It's a small limit to reduce calls to
@@ -14776,7 +14750,7 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
1477614750
ReadyInsts.clear();
1477714751
}
1477814752

14779-
void BoUpSLP::scheduleBlock(BlockScheduling *BS, BoUpSLP &R) {
14753+
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
1478014754
if (!BS->ScheduleStart)
1478114755
return;
1478214756

@@ -14833,8 +14807,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS, BoUpSLP &R) {
1483314807
for (ScheduleData *BundleMember = Picked; BundleMember;
1483414808
BundleMember = BundleMember->NextInBundle) {
1483514809
Instruction *PickedInst = BundleMember->Inst;
14836-
if (R.isDeleted(PickedInst))
14837-
continue;
1483814810
if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst)
1483914811
PickedInst->moveAfter(LastScheduledInst->getPrevNode());
1484014812
LastScheduledInst = PickedInst;
@@ -17372,11 +17344,14 @@ class HorizontalReduction {
1737217344
Value *ReducedSubTree =
1737317345
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
1737417346
if (ReducedSubTree->getType() != VL.front()->getType()) {
17375-
assert(ReducedSubTree->getType() != VL.front()->getType() &&
17376-
"Expected different reduction type.");
17377-
ReducedSubTree =
17378-
Builder.CreateIntCast(ReducedSubTree, VL.front()->getType(),
17379-
V.isSignedMinBitwidthRootNode());
17347+
ReducedSubTree = Builder.CreateIntCast(
17348+
ReducedSubTree, VL.front()->getType(), any_of(VL, [&](Value *R) {
17349+
KnownBits Known = computeKnownBits(
17350+
R, cast<Instruction>(ReductionOps.front().front())
17351+
->getModule()
17352+
->getDataLayout());
17353+
return !Known.isNonNegative();
17354+
}));
1738017355
}
1738117356

1738217357
// Improved analysis for add/fadd/xor reductions with same scale factor
@@ -17538,13 +17513,10 @@ class HorizontalReduction {
1753817513
}
1753917514
#endif
1754017515
if (!Ignore->use_empty()) {
17541-
Value *P = PoisonValue::get(Ignore->getType());
17542-
Ignore->replaceAllUsesWith(P);
17516+
Value *Undef = UndefValue::get(Ignore->getType());
17517+
Ignore->replaceAllUsesWith(Undef);
1754317518
}
17544-
auto *I = cast<Instruction>(Ignore);
17545-
// Clear the operands, marking for deletion trivially dead operands.
17546-
V.clearOperands(I);
17547-
V.eraseInstruction(I);
17519+
V.eraseInstruction(cast<Instruction>(Ignore));
1754817520
}
1754917521
}
1755017522
} else if (!CheckForReusedReductionOps) {

llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,10 @@ define void @add_v64i8() {
503503
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
504504
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
505505
; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
506-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
507506
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
508507
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
509508
; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
509+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
510510
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
511511
; SSE-NEXT: ret void
512512
;
@@ -522,10 +522,10 @@ define void @add_v64i8() {
522522
; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
523523
; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
524524
; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
525-
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
526525
; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
527526
; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
528527
; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
528+
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
529529
; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
530530
; SLM-NEXT: ret void
531531
;

llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,10 @@ define void @add_v64i8() {
401401
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
402402
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
403403
; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
404-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
405404
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
406405
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
407406
; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
407+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
408408
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
409409
; SSE-NEXT: ret void
410410
;

llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,10 @@ define void @add_v64i8() {
439439
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
440440
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
441441
; SSE-NEXT: [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]]
442-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
443442
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
444443
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
445444
; SSE-NEXT: [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]]
445+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
446446
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
447447
; SSE-NEXT: ret void
448448
;
@@ -458,10 +458,10 @@ define void @add_v64i8() {
458458
; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
459459
; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
460460
; SLM-NEXT: [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]]
461-
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
462461
; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
463462
; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
464463
; SLM-NEXT: [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]]
464+
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
465465
; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
466466
; SLM-NEXT: ret void
467467
;

llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,10 @@ define void @smul_v64i8() {
520520
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
521521
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
522522
; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
523-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
524523
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
525524
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
526525
; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
526+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
527527
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
528528
; SSE-NEXT: ret void
529529
;
@@ -539,10 +539,10 @@ define void @smul_v64i8() {
539539
; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
540540
; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
541541
; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
542-
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
543542
; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
544543
; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
545544
; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
545+
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
546546
; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
547547
; SLM-NEXT: ret void
548548
;
@@ -1323,10 +1323,10 @@ define void @umul_v64i8() {
13231323
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
13241324
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
13251325
; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
1326-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
13271326
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
13281327
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
13291328
; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
1329+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
13301330
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
13311331
; SSE-NEXT: ret void
13321332
;
@@ -1342,10 +1342,10 @@ define void @umul_v64i8() {
13421342
; SLM-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
13431343
; SLM-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
13441344
; SLM-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
1345-
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
13461345
; SLM-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
13471346
; SLM-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
13481347
; SLM-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
1348+
; SLM-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
13491349
; SLM-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
13501350
; SLM-NEXT: ret void
13511351
;

llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,10 @@ define void @fshl_v64i8() {
480480
; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
481481
; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
482482
; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
483-
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1
484483
; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
485484
; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
486485
; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
486+
; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1
487487
; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1
488488
; SSE-NEXT: ret void
489489
;

0 commit comments

Comments
 (0)