@@ -2811,6 +2811,17 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
2811
2811
BF16Tbl, ISD, DstTy.getSimpleVT (), SrcTy.getSimpleVT ()))
2812
2812
return AdjustCost (Entry->Cost );
2813
2813
2814
+ // Symbolic constants for the SVE sitofp/uitofp entries in the table below
2815
+ // The cost of unpacking twice is artificially increased for now in order
2816
+ // to avoid regressions against NEON, which will use tbl instructions directly
2817
+ // instead of multiple layers of [s|u]unpk[lo|hi].
2818
+ // We use the unpacks in cases where the destination type is illegal and
2819
+ // requires splitting of the input, even if the input type itself is legal.
2820
+ const unsigned int SVE_EXT_COST = 1 ;
2821
+ const unsigned int SVE_FCVT_COST = 1 ;
2822
+ const unsigned int SVE_UNPACK_ONCE = 4 ;
2823
+ const unsigned int SVE_UNPACK_TWICE = 16 ;
2824
+
2814
2825
static const TypeConversionCostTblEntry ConversionTbl[] = {
2815
2826
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // xtn
2816
2827
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, // xtn
@@ -2936,6 +2947,42 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
2936
2947
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
2937
2948
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
2938
2949
2950
+ // SVE: to nxv2f16
2951
+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2952
+ SVE_EXT_COST + SVE_FCVT_COST},
2953
+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2954
+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2955
+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2956
+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2957
+ SVE_EXT_COST + SVE_FCVT_COST},
2958
+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2959
+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2960
+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2961
+
2962
+ // SVE: to nxv4f16
2963
+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2964
+ SVE_EXT_COST + SVE_FCVT_COST},
2965
+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2966
+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2967
+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2968
+ SVE_EXT_COST + SVE_FCVT_COST},
2969
+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2970
+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2971
+
2972
+ // SVE: to nxv8f16
2973
+ {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2974
+ SVE_EXT_COST + SVE_FCVT_COST},
2975
+ {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2976
+ {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2977
+ SVE_EXT_COST + SVE_FCVT_COST},
2978
+ {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2979
+
2980
+ // SVE: to nxv16f16
2981
+ {ISD::SINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2982
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2983
+ {ISD::UINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2984
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2985
+
2939
2986
// Complex: to v2f32
2940
2987
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
2941
2988
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
@@ -2944,18 +2991,56 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
2944
2991
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
2945
2992
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
2946
2993
2994
+ // SVE: to nxv2f32
2995
+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
2996
+ SVE_EXT_COST + SVE_FCVT_COST},
2997
+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
2998
+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
2999
+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3000
+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
3001
+ SVE_EXT_COST + SVE_FCVT_COST},
3002
+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3003
+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3004
+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3005
+
2947
3006
// Complex: to v4f32
2948
3007
{ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
2949
3008
{ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
2950
3009
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
2951
3010
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
2952
3011
3012
+ // SVE: to nxv4f32
3013
+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3014
+ SVE_EXT_COST + SVE_FCVT_COST},
3015
+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3016
+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3017
+ {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3018
+ SVE_EXT_COST + SVE_FCVT_COST},
3019
+ {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3020
+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3021
+
2953
3022
// Complex: to v8f32
2954
3023
{ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
2955
3024
{ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
2956
3025
{ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
2957
3026
{ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
2958
3027
3028
+ // SVE: to nxv8f32
3029
+ {ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3030
+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3031
+ {ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3032
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3033
+ {ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3034
+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3035
+ {ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3036
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3037
+
3038
+ // SVE: to nxv16f32
3039
+ {ISD::SINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3040
+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3041
+ {ISD::UINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3042
+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3043
+
2959
3044
// Complex: to v16f32
2960
3045
{ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
2961
3046
{ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
@@ -2968,10 +3053,46 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
2968
3053
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
2969
3054
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
2970
3055
3056
+ // SVE: to nxv2f64
3057
+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3058
+ SVE_EXT_COST + SVE_FCVT_COST},
3059
+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3060
+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3061
+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3062
+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3063
+ SVE_EXT_COST + SVE_FCVT_COST},
3064
+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3065
+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3066
+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3067
+
2971
3068
// Complex: to v4f64
2972
3069
{ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 4 },
2973
3070
{ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 4 },
2974
3071
3072
+ // SVE: to nxv4f64
3073
+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3074
+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3075
+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3076
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3077
+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3078
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3079
+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3080
+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3081
+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3082
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3083
+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3084
+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3085
+
3086
+ // SVE: to nxv8f64
3087
+ {ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3088
+ SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3089
+ {ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3090
+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3091
+ {ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3092
+ SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3093
+ {ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3094
+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3095
+
2975
3096
// LowerVectorFP_TO_INT
2976
3097
{ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
2977
3098
{ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
0 commit comments