@@ -19087,69 +19087,82 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
1908719087/// functions, this can help to reduce the number of fmovs to/from GPRs.
1908819088static SDValue
1908919089tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
19090+ TargetLowering::DAGCombinerInfo &DCI,
1909019091 const AArch64Subtarget *Subtarget) {
1909119092 if (N->isStrictFPOpcode())
1909219093 return SDValue();
1909319094
19095+ if (DCI.isBeforeLegalizeOps())
19096+ return SDValue();
19097+
1909419098 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
1909519099 (!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
1909619100 return SDValue();
1909719101
1909819102 auto isSupportedType = [](EVT VT) {
19099- if (!VT.isSimple())
19100- return false;
19101- // There are SVE instructions that can convert to/from all pairs of these
19102- // int and float types. Note: We don't bother with i8 or i16 as those are
19103- // illegal types for scalars.
19104- return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
19105- VT.getSimpleVT().SimpleTy);
19103+ return VT != MVT::bf16 && VT != MVT::f128;
1910619104 };
1910719105
1910819106 if (!isSupportedType(N->getValueType(0)) ||
1910919107 !isSupportedType(N->getOperand(0).getValueType()))
1911019108 return SDValue();
1911119109
19110+ // Look through fp_extends to avoid extra fcvts.
1911219111 SDValue SrcVal = N->getOperand(0);
19112+ if (SrcVal->getOpcode() == ISD::FP_EXTEND &&
19113+ isSupportedType(SrcVal->getOperand(0).getValueType()))
19114+ SrcVal = SrcVal->getOperand(0);
19115+
1911319116 EVT SrcTy = SrcVal.getValueType();
1911419117 EVT DestTy = N->getValueType(0);
1911519118
19116- bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
19117- bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
19118-
19119- // Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19120- // type (unlike the equivalent nxv2f32 for floating-point types).
19121- // TODO: Support these conversations.
19122- if (IsI32ToF64 || isF64ToI32)
19123- return SDValue();
19119+ // Merge in any subsequent fp_round to avoid extra fcvts.
19120+ SDNode *FPRoundNode = nullptr;
19121+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND &&
19122+ isSupportedType(N->use_begin()->getValueType(0))) {
19123+ FPRoundNode = *N->use_begin();
19124+ DestTy = FPRoundNode->getValueType(0);
19125+ }
1912419126
1912519127 EVT SrcVecTy;
1912619128 EVT DestVecTy;
1912719129 if (DestTy.bitsGT(SrcTy)) {
1912819130 DestVecTy = getPackedSVEVectorVT(DestTy);
19129- SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19130- : DestVecTy.changeVectorElementType(SrcTy);
19131+ SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
1913119132 } else {
1913219133 SrcVecTy = getPackedSVEVectorVT(SrcTy);
19133- DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19134- : SrcVecTy.changeVectorElementType(DestTy);
19134+ DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
1913519135 }
1913619136
19137+ // Ensure the resulting src/dest vector type is legal.
19138+ if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
19139+ return SDValue();
19140+
1913719141 SDLoc DL(N);
1913819142 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
1913919143 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
1914019144 DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
1914119145 SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
19142- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
19146+ SDValue Scalar =
19147+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
19148+
19149+ if (FPRoundNode) {
19150+ DAG.ReplaceAllUsesWith(SDValue(FPRoundNode, 0), Scalar);
19151+ return SDValue();
19152+ }
19153+ return Scalar;
1914319154}
1914419155
1914519156static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19157+ TargetLowering::DAGCombinerInfo &DCI,
1914619158 const AArch64Subtarget *Subtarget) {
1914719159 // First try to optimize away the conversion when it's conditionally from
1914819160 // a constant. Vectors only.
1914919161 if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
1915019162 return Res;
1915119163
19152- if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19164+ if (SDValue Res =
19165+ tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
1915319166 return Res;
1915419167
1915519168 EVT VT = N->getValueType(0);
@@ -19190,7 +19203,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1919019203static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
1919119204 TargetLowering::DAGCombinerInfo &DCI,
1919219205 const AArch64Subtarget *Subtarget) {
19193- if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19206+ if (SDValue Res =
19207+ tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
1919419208 return Res;
1919519209
1919619210 if (!Subtarget->isNeonAvailable())
@@ -26273,7 +26287,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2627326287 return performMulCombine(N, DAG, DCI, Subtarget);
2627426288 case ISD::SINT_TO_FP:
2627526289 case ISD::UINT_TO_FP:
26276- return performIntToFpCombine(N, DAG, Subtarget);
26290+ return performIntToFpCombine(N, DAG, DCI, Subtarget);
2627726291 case ISD::FP_TO_SINT:
2627826292 case ISD::FP_TO_UINT:
2627926293 case ISD::FP_TO_SINT_SAT:
0 commit comments