Skip to content

Commit 71e4987

Browse files
d0kllvmbot
authored andcommitted
Revert "[SLP]Alternate vectorization for cmp instructions."
This reverts commit 83620bd. It's causing miscompilations, see review comments at https://reviews.llvm.org/D115955 (cherry picked from commit 0c3d22a)
1 parent 5410d0a commit 71e4987

File tree

5 files changed

+251
-367
lines changed

5 files changed

+251
-367
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -471,36 +471,17 @@ static bool isValidForAlternation(unsigned Opcode) {
471471
return true;
472472
}
473473

474-
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
475-
unsigned BaseIndex = 0);
476-
477-
/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
478-
/// compatible instructions or constants, or just some other regular values.
479-
static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
480-
Value *Op1) {
481-
return (isConstant(BaseOp0) && isConstant(Op0)) ||
482-
(isConstant(BaseOp1) && isConstant(Op1)) ||
483-
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
484-
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
485-
getSameOpcode({BaseOp0, Op0}).getOpcode() ||
486-
getSameOpcode({BaseOp1, Op1}).getOpcode();
487-
}
488-
489474
/// \returns analysis of the Instructions in \p VL described in
490475
/// InstructionsState, the Opcode that we suppose the whole list
491476
/// could be vectorized even if its structure is diverse.
492477
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
493-
unsigned BaseIndex) {
478+
unsigned BaseIndex = 0) {
494479
// Make sure these are all Instructions.
495480
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
496481
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
497482

498483
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
499484
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
500-
bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
501-
CmpInst::Predicate BasePred =
502-
IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
503-
: CmpInst::BAD_ICMP_PREDICATE;
504485
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
505486
unsigned AltOpcode = Opcode;
506487
unsigned AltIndex = BaseIndex;
@@ -533,57 +514,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
533514
continue;
534515
}
535516
}
536-
} else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
537-
auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
538-
auto *Inst = cast<Instruction>(VL[Cnt]);
539-
Type *Ty0 = BaseInst->getOperand(0)->getType();
540-
Type *Ty1 = Inst->getOperand(0)->getType();
541-
if (Ty0 == Ty1) {
542-
Value *BaseOp0 = BaseInst->getOperand(0);
543-
Value *BaseOp1 = BaseInst->getOperand(1);
544-
Value *Op0 = Inst->getOperand(0);
545-
Value *Op1 = Inst->getOperand(1);
546-
CmpInst::Predicate CurrentPred =
547-
cast<CmpInst>(VL[Cnt])->getPredicate();
548-
CmpInst::Predicate SwappedCurrentPred =
549-
CmpInst::getSwappedPredicate(CurrentPred);
550-
// Check for compatible operands. If the corresponding operands are not
551-
// compatible - need to perform alternate vectorization.
552-
if (InstOpcode == Opcode) {
553-
if (BasePred == CurrentPred &&
554-
areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
555-
continue;
556-
if (BasePred == SwappedCurrentPred &&
557-
areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
558-
continue;
559-
if (E == 2 &&
560-
(BasePred == CurrentPred || BasePred == SwappedCurrentPred))
561-
continue;
562-
auto *AltInst = cast<CmpInst>(VL[AltIndex]);
563-
CmpInst::Predicate AltPred = AltInst->getPredicate();
564-
Value *AltOp0 = AltInst->getOperand(0);
565-
Value *AltOp1 = AltInst->getOperand(1);
566-
// Check if operands are compatible with alternate operands.
567-
if (AltPred == CurrentPred &&
568-
areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
569-
continue;
570-
if (AltPred == SwappedCurrentPred &&
571-
areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
572-
continue;
573-
}
574-
if (BaseIndex == AltIndex) {
575-
assert(isValidForAlternation(Opcode) &&
576-
isValidForAlternation(InstOpcode) &&
577-
"Cast isn't safe for alternation, logic needs to be updated!");
578-
AltIndex = Cnt;
579-
continue;
580-
}
581-
auto *AltInst = cast<CmpInst>(VL[AltIndex]);
582-
CmpInst::Predicate AltPred = AltInst->getPredicate();
583-
if (BasePred == CurrentPred || BasePred == SwappedCurrentPred ||
584-
AltPred == CurrentPred || AltPred == SwappedCurrentPred)
585-
continue;
586-
}
587517
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
588518
continue;
589519
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -4424,41 +4354,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
44244354
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
44254355

44264356
// Reorder operands if reordering would enable vectorization.
4427-
auto *CI = dyn_cast<CmpInst>(VL0);
4428-
if (isa<BinaryOperator>(VL0) || CI) {
4357+
if (isa<BinaryOperator>(VL0)) {
44294358
ValueList Left, Right;
4430-
if (!CI || all_of(VL, [](Value *V) {
4431-
return cast<CmpInst>(V)->isCommutative();
4432-
})) {
4433-
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
4434-
} else {
4435-
CmpInst::Predicate P0 = CI->getPredicate();
4436-
CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
4437-
CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
4438-
Value *BaseOp0 = VL0->getOperand(0);
4439-
Value *BaseOp1 = VL0->getOperand(1);
4440-
// Collect operands - commute if it uses the swapped predicate or
4441-
// alternate operation.
4442-
for (Value *V : VL) {
4443-
auto *Cmp = cast<CmpInst>(V);
4444-
Value *LHS = Cmp->getOperand(0);
4445-
Value *RHS = Cmp->getOperand(1);
4446-
CmpInst::Predicate CurrentPred = CI->getPredicate();
4447-
CmpInst::Predicate CurrentPredSwapped =
4448-
CmpInst::getSwappedPredicate(CurrentPred);
4449-
if (P0 == AltP0 || P0 == AltP0Swapped) {
4450-
if ((P0 == CurrentPred &&
4451-
!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
4452-
(P0 == CurrentPredSwapped &&
4453-
!areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
4454-
std::swap(LHS, RHS);
4455-
} else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
4456-
std::swap(LHS, RHS);
4457-
}
4458-
Left.push_back(LHS);
4459-
Right.push_back(RHS);
4460-
}
4461-
}
4359+
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
44624360
TE->setOperand(0, Left);
44634361
TE->setOperand(1, Right);
44644362
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5390,8 +5288,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
53905288
((Instruction::isBinaryOp(E->getOpcode()) &&
53915289
Instruction::isBinaryOp(E->getAltOpcode())) ||
53925290
(Instruction::isCast(E->getOpcode()) &&
5393-
Instruction::isCast(E->getAltOpcode())) ||
5394-
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
5291+
Instruction::isCast(E->getAltOpcode()))) &&
53955292
"Invalid Shuffle Vector Operand");
53965293
InstructionCost ScalarCost = 0;
53975294
if (NeedToShuffleReuses) {
@@ -5439,14 +5336,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
54395336
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
54405337
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
54415338
CostKind);
5442-
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
5443-
VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
5444-
Builder.getInt1Ty(),
5445-
CI0->getPredicate(), CostKind, VL0);
5446-
VecCost += TTI->getCmpSelInstrCost(
5447-
E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
5448-
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
5449-
E->getAltOp());
54505339
} else {
54515340
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
54525341
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
@@ -5463,29 +5352,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
54635352
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
54645353
[E](Instruction *I) {
54655354
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
5466-
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
5467-
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
5468-
auto *CI = cast<CmpInst>(I);
5469-
CmpInst::Predicate P0 = CI0->getPredicate();
5470-
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
5471-
CmpInst::Predicate AltP0Swapped =
5472-
CmpInst::getSwappedPredicate(AltP0);
5473-
CmpInst::Predicate CurrentPred = CI->getPredicate();
5474-
CmpInst::Predicate CurrentPredSwapped =
5475-
CmpInst::getSwappedPredicate(CurrentPred);
5476-
if (P0 == AltP0 || P0 == AltP0Swapped) {
5477-
// Alternate cmps have same/swapped predicate as main cmps but
5478-
// different order of compatible operands.
5479-
return !(
5480-
(P0 == CurrentPred &&
5481-
areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
5482-
I->getOperand(0), I->getOperand(1))) ||
5483-
(P0 == CurrentPredSwapped &&
5484-
areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
5485-
I->getOperand(1), I->getOperand(0))));
5486-
}
5487-
return CurrentPred != P0 && CurrentPredSwapped != P0;
5488-
}
54895355
return I->getOpcode() == E->getAltOpcode();
54905356
},
54915357
Mask);
@@ -6968,12 +6834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
69686834
((Instruction::isBinaryOp(E->getOpcode()) &&
69696835
Instruction::isBinaryOp(E->getAltOpcode())) ||
69706836
(Instruction::isCast(E->getOpcode()) &&
6971-
Instruction::isCast(E->getAltOpcode())) ||
6972-
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
6837+
Instruction::isCast(E->getAltOpcode()))) &&
69736838
"Invalid Shuffle Vector Operand");
69746839

69756840
Value *LHS = nullptr, *RHS = nullptr;
6976-
if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
6841+
if (Instruction::isBinaryOp(E->getOpcode())) {
69776842
setInsertPointAfterBundle(E);
69786843
LHS = vectorizeTree(E->getOperand(0));
69796844
RHS = vectorizeTree(E->getOperand(1));
@@ -6993,15 +6858,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
69936858
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
69946859
V1 = Builder.CreateBinOp(
69956860
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
6996-
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
6997-
V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
6998-
auto *AltCI = cast<CmpInst>(E->getAltOp());
6999-
CmpInst::Predicate AltPred = AltCI->getPredicate();
7000-
unsigned AltIdx =
7001-
std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
7002-
if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
7003-
AltPred = CmpInst::getSwappedPredicate(AltPred);
7004-
V1 = Builder.CreateCmp(AltPred, LHS, RHS);
70056861
} else {
70066862
V0 = Builder.CreateCast(
70076863
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
@@ -7026,29 +6882,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
70266882
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
70276883
[E](Instruction *I) {
70286884
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
7029-
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
7030-
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
7031-
auto *CI = cast<CmpInst>(I);
7032-
CmpInst::Predicate P0 = CI0->getPredicate();
7033-
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
7034-
CmpInst::Predicate AltP0Swapped =
7035-
CmpInst::getSwappedPredicate(AltP0);
7036-
CmpInst::Predicate CurrentPred = CI->getPredicate();
7037-
CmpInst::Predicate CurrentPredSwapped =
7038-
CmpInst::getSwappedPredicate(CurrentPred);
7039-
if (P0 == AltP0 || P0 == AltP0Swapped) {
7040-
// Alternate cmps have same/swapped predicate as main cmps but
7041-
// different order of compatible operands.
7042-
return !(
7043-
(P0 == CurrentPred &&
7044-
areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
7045-
I->getOperand(0), I->getOperand(1))) ||
7046-
(P0 == CurrentPredSwapped &&
7047-
areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
7048-
I->getOperand(1), I->getOperand(0))));
7049-
}
7050-
return CurrentPred != P0 && CurrentPredSwapped != P0;
7051-
}
70526885
return I->getOpcode() == E->getAltOpcode();
70536886
},
70546887
Mask, &OpScalars, &AltScalars);

llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,24 @@ return:
9090
define float @test_merge_anyof_v4sf(<4 x float> %t) {
9191
; CHECK-LABEL: @test_merge_anyof_v4sf(
9292
; CHECK-NEXT: entry:
93-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[T:%.*]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
94-
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
95-
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
96-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
97-
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
98-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
99-
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
100-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
101-
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
102-
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
103-
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00
93+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i64 3
94+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i64 2
95+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i64 1
96+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i64 0
97+
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
98+
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
99+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
100+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
101+
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
102+
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]]
103+
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
104+
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
105+
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
106+
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
107+
; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
108+
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
109+
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
110+
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
104111
; CHECK-NEXT: ret float [[RETVAL_0]]
105112
;
106113
entry:
@@ -413,18 +420,24 @@ return:
413420
define float @test_merge_anyof_v4si(<4 x i32> %t) {
414421
; CHECK-LABEL: @test_merge_anyof_v4si(
415422
; CHECK-NEXT: entry:
416-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[T:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
417-
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
418-
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
419-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
420-
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
421-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
422-
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
423-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
424-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
425-
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP5]], i64 0
423+
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
424+
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
425+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
426+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
427+
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
428+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
429+
; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]]
430+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
431+
; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]]
432+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
433+
; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]]
434+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
435+
; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]]
436+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
437+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
438+
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0
426439
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
427-
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00
440+
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
428441
; CHECK-NEXT: ret float [[RETVAL_0]]
429442
;
430443
entry:

0 commit comments

Comments
 (0)