@@ -8384,6 +8384,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8384
8384
(void)E;
8385
8385
return TTI->getInstructionCost(VI, CostKind);
8386
8386
};
8387
+ // FIXME: Workaround for syntax error reported by MSVC buildbots.
8388
+ TargetTransformInfo &TTIRef = *TTI;
8387
8389
// Need to clear CommonCost since the final shuffle cost is included into
8388
8390
// vector cost.
8389
8391
auto GetVectorCost = [&](InstructionCost) {
@@ -8398,14 +8400,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8398
8400
// No need to add new vector costs here since we're going to reuse
8399
8401
// same main/alternate vector ops, just do different shuffling.
8400
8402
} else if (Instruction::isBinaryOp(E->getOpcode())) {
8401
- VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
8403
+ VecCost =
8404
+ TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
8402
8405
VecCost +=
8403
- TTI-> getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
8406
+ TTIRef. getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
8404
8407
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
8405
8408
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
8406
- VecCost = TTI-> getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
8407
- CI0->getPredicate(), CostKind, VL0);
8408
- VecCost += TTI-> getCmpSelInstrCost(
8409
+ VecCost = TTIRef. getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
8410
+ CI0->getPredicate(), CostKind, VL0);
8411
+ VecCost += TTIRef. getCmpSelInstrCost(
8409
8412
E->getOpcode(), VecTy, MaskTy,
8410
8413
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
8411
8414
E->getAltOp());
@@ -8414,10 +8417,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8414
8417
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
8415
8418
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
8416
8419
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
8417
- VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
8418
- TTI::CastContextHint::None, CostKind);
8419
- VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
8420
- TTI::CastContextHint::None, CostKind);
8420
+ VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
8421
+ TTI::CastContextHint::None, CostKind);
8422
+ VecCost +=
8423
+ TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
8424
+ TTI::CastContextHint::None, CostKind);
8421
8425
}
8422
8426
SmallVector<int> Mask;
8423
8427
E->buildAltOpShuffleMask(
@@ -8426,8 +8430,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8426
8430
return I->getOpcode() == E->getAltOpcode();
8427
8431
},
8428
8432
Mask);
8429
- VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
8430
- FinalVecTy, Mask);
8433
+ VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
8434
+ FinalVecTy, Mask);
8435
+ // Patterns like [fadd,fsub] can be combined into a single instruction
8436
+ // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
8437
+ // need to take into account their order when looking for the most used
8438
+ // order.
8439
+ unsigned Opcode0 = E->getOpcode();
8440
+ unsigned Opcode1 = E->getAltOpcode();
8441
+ // The opcode mask selects between the two opcodes.
8442
+ SmallBitVector OpcodeMask(E->Scalars.size(), false);
8443
+ for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
8444
+ if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
8445
+ OpcodeMask.set(Lane);
8446
+ // If this pattern is supported by the target then we consider the
8447
+ // order.
8448
+ if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
8449
+ InstructionCost AltVecCost = TTIRef.getAltInstrCost(
8450
+ VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
8451
+ return AltVecCost < VecCost ? AltVecCost : VecCost;
8452
+ }
8453
+ // TODO: Check the reverse order too.
8431
8454
return VecCost;
8432
8455
};
8433
8456
return GetCostDiff(GetScalarCost, GetVectorCost);
0 commit comments