@@ -461,6 +461,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
461
461
Value *A = State.get (getOperand (0 ));
462
462
return Builder.CreateNot (A, Name);
463
463
}
464
+ case Instruction::ExtractElement: {
465
+ Value *Vec = State.get (getOperand (0 ));
466
+ Value *Idx = State.get (getOperand (1 ), true );
467
+ return Builder.CreateExtractElement (Vec, Idx, Name);
468
+ }
464
469
case Instruction::ICmp: {
465
470
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed (this );
466
471
Value *A = State.get (getOperand (0 ), OnlyFirstLaneUsed);
@@ -705,12 +710,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
705
710
Value *A = State.get (getOperand (0 ));
706
711
return Builder.CreateOrReduce (A);
707
712
}
708
- case VPInstruction::ExtractFirstActive: {
709
- Value *Vec = State.get (getOperand (0 ));
710
- Value *Mask = State.get (getOperand (1 ));
711
- Value *Ctz = Builder.CreateCountTrailingZeroElems (
712
- Builder.getInt64Ty (), Mask, true , " first.active.lane" );
713
- return Builder.CreateExtractElement (Vec, Ctz, " early.exit.value" );
713
+ case VPInstruction::FirstActiveLane: {
714
+ Value *Mask = State.get (getOperand (0 ));
715
+ return Builder.CreateCountTrailingZeroElems (Builder.getInt64Ty (), Mask,
716
+ true , Name);
714
717
}
715
718
default :
716
719
llvm_unreachable (" Unsupported opcode for instruction" );
@@ -737,22 +740,24 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
737
740
}
738
741
739
742
switch (getOpcode ()) {
743
+ case Instruction::ExtractElement: {
744
+ // Add on the cost of extracting the element.
745
+ auto *VecTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF);
746
+ return Ctx.TTI .getVectorInstrCost (Instruction::ExtractElement, VecTy,
747
+ Ctx.CostKind );
748
+ }
740
749
case VPInstruction::AnyOf: {
741
750
auto *VecTy = toVectorTy (Ctx.Types .inferScalarType (this ), VF);
742
751
return Ctx.TTI .getArithmeticReductionCost (
743
752
Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind );
744
753
}
745
- case VPInstruction::ExtractFirstActive : {
754
+ case VPInstruction::FirstActiveLane : {
746
755
// Calculate the cost of determining the lane index.
747
- auto *PredTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (1 )), VF);
756
+ auto *PredTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF);
748
757
IntrinsicCostAttributes Attrs (Intrinsic::experimental_cttz_elts,
749
758
Type::getInt64Ty (Ctx.LLVMCtx ),
750
759
{PredTy, Type::getInt1Ty (Ctx.LLVMCtx )});
751
- InstructionCost Cost = Ctx.TTI .getIntrinsicInstrCost (Attrs, Ctx.CostKind );
752
- // Add on the cost of extracting the element.
753
- auto *VecTy = toVectorTy (Ctx.Types .inferScalarType (getOperand (0 )), VF);
754
- return Cost + Ctx.TTI .getVectorInstrCost (Instruction::ExtractElement, VecTy,
755
- Ctx.CostKind );
760
+ return Ctx.TTI .getIntrinsicInstrCost (Attrs, Ctx.CostKind );
756
761
}
757
762
default :
758
763
// TODO: Compute cost other VPInstructions once the legacy cost model has
@@ -765,7 +770,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
765
770
766
771
bool VPInstruction::isVectorToScalar () const {
767
772
return getOpcode () == VPInstruction::ExtractFromEnd ||
768
- getOpcode () == VPInstruction::ExtractFirstActive ||
773
+ getOpcode () == Instruction::ExtractElement ||
774
+ getOpcode () == VPInstruction::FirstActiveLane ||
769
775
getOpcode () == VPInstruction::ComputeReductionResult ||
770
776
getOpcode () == VPInstruction::AnyOf;
771
777
}
@@ -824,13 +830,14 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
824
830
if (Instruction::isBinaryOp (getOpcode ()))
825
831
return false ;
826
832
switch (getOpcode ()) {
833
+ case Instruction::ExtractElement:
827
834
case Instruction::ICmp:
828
835
case Instruction::Select:
829
836
case VPInstruction::AnyOf:
830
837
case VPInstruction::CalculateTripCountMinusVF:
831
838
case VPInstruction::CanonicalIVIncrementForPart:
832
839
case VPInstruction::ExtractFromEnd:
833
- case VPInstruction::ExtractFirstActive :
840
+ case VPInstruction::FirstActiveLane :
834
841
case VPInstruction::FirstOrderRecurrenceSplice:
835
842
case VPInstruction::LogicalAnd:
836
843
case VPInstruction::Not:
@@ -939,7 +946,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
939
946
case VPInstruction::Broadcast:
940
947
O << " broadcast" ;
941
948
break ;
942
-
943
949
case VPInstruction::ExtractFromEnd:
944
950
O << " extract-from-end" ;
945
951
break ;
@@ -955,8 +961,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
955
961
case VPInstruction::AnyOf:
956
962
O << " any-of" ;
957
963
break ;
958
- case VPInstruction::ExtractFirstActive :
959
- O << " extract- first-active" ;
964
+ case VPInstruction::FirstActiveLane :
965
+ O << " first-active-lane " ;
960
966
break ;
961
967
default :
962
968
O << Instruction::getOpcodeName (getOpcode ());
0 commit comments