|
25 | 25 | #include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
26 | 26 | #include "llvm/Analysis/TargetTransformInfo.h"
|
27 | 27 | #include "llvm/Analysis/TargetTransformInfoImpl.h"
|
| 28 | +#include "llvm/Analysis/ValueTracking.h" |
28 | 29 | #include "llvm/CodeGen/ISDOpcodes.h"
|
29 | 30 | #include "llvm/CodeGen/TargetLowering.h"
|
30 | 31 | #include "llvm/CodeGen/TargetSubtargetInfo.h"
|
@@ -1758,6 +1759,53 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
|
1758 | 1759 | CmpInst::ICMP_ULT, CostKind);
|
1759 | 1760 | return Cost;
|
1760 | 1761 | }
|
| 1762 | + case Intrinsic::experimental_cttz_elts: { |
| 1763 | + EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); |
| 1764 | + |
| 1765 | + // If we're not expanding the intrinsic then we assume this is cheap |
| 1766 | + // to implement. |
| 1767 | + if (!getTLI()->shouldExpandCttzElements(ArgType)) |
| 1768 | + return getTypeLegalizationCost(RetTy).first; |
| 1769 | + |
| 1770 | + // TODO: The costs below reflect the expansion code in |
| 1771 | + // SelectionDAGBuilder, but we may want to sacrifice some accuracy in |
| 1772 | + // favour of compile time. |
| 1773 | + |
| 1774 | + // Find the smallest "sensible" element type to use for the expansion. |
| 1775 | + bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero(); |
| 1776 | + ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64)); |
| 1777 | + if (isa<ScalableVectorType>(ICA.getArgTypes()[0]) && I && I->getCaller()) |
| 1778 | + VScaleRange = getVScaleRange(I->getCaller(), 64); |
| 1779 | + |
| 1780 | + unsigned EltWidth = getTLI()->getBitWidthForCttzElements( |
| 1781 | + RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange); |
| 1782 | + Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth); |
| 1783 | + |
| 1784 | + // Create the new vector type & get the vector length |
| 1785 | + Type *NewVecTy = VectorType::get( |
| 1786 | + NewEltTy, cast<VectorType>(Args[0]->getType())->getElementCount()); |
| 1787 | + |
| 1788 | + IntrinsicCostAttributes StepVecAttrs(Intrinsic::experimental_stepvector, |
| 1789 | + NewVecTy, {}, FMF); |
| 1790 | + InstructionCost Cost = |
| 1791 | + thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind); |
| 1792 | + |
| 1793 | + Cost += |
| 1794 | + thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind); |
| 1795 | + Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy, |
| 1796 | + Args[0]->getType(), |
| 1797 | + TTI::CastContextHint::None, CostKind); |
| 1798 | + Cost += |
| 1799 | + thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind); |
| 1800 | + |
| 1801 | + IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax, |
| 1802 | + NewEltTy, NewVecTy, FMF, I, 1); |
| 1803 | + Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind); |
| 1804 | + Cost += |
| 1805 | + thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind); |
| 1806 | + |
| 1807 | + return Cost; |
| 1808 | + } |
1761 | 1809 | }
|
1762 | 1810 |
|
1763 | 1811 | // VP Intrinsics should have the same cost as their non-vp counterpart.
|
|
0 commit comments