Skip to content

Commit e44e24d

Browse files
authored
[AArch64] Improve vector funnel shift by constant costs. (#130044)
We now have better codegen, and can have better costs to match. The generated code should now produce a shl+usra and can be seen in testcases such as: https://github.com/llvm/llvm-project/blob/7e5821bae80db3f3f0fe0d5f8ce62f79e548eed5/llvm/test/CodeGen/AArch64/fsh.ll#L3941.
1 parent 62a6d63 commit e44e24d

File tree

3 files changed

+20
-21
lines changed

3 files changed

+20
-21
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -884,12 +884,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
884884

885885
const auto LegalisationCost = getTypeLegalizationCost(RetTy);
886886
if (OpInfoZ.isUniform()) {
887-
// FIXME: The costs could be lower if the codegen is better.
888887
static const CostTblEntry FshlTbl[] = {
889-
{Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr
890-
{Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
891-
{Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
892-
{Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
888+
{Intrinsic::fshl, MVT::v4i32, 2}, // shl + usra
889+
{Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
890+
{Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
891+
{Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
893892
// Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl
894893
// to avoid having to duplicate the costs.
895894
const auto *Entry =

llvm/test/Analysis/CostModel/AArch64/fshl.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,11 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)
129129

130130
define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
131131
; RECIP-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
132-
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
132+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
133133
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
134134
;
135135
; SIZE-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
136-
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
136+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
137137
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshl
138138
;
139139
entry:
@@ -173,11 +173,11 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
173173

174174
define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
175175
; RECIP-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
176-
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
176+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
177177
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
178178
;
179179
; SIZE-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
180-
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
180+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
181181
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshl
182182
;
183183
entry:
@@ -217,11 +217,11 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
217217

218218
define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
219219
; RECIP-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
220-
; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
220+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
221221
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
222222
;
223223
; SIZE-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
224-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
224+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
225225
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshl
226226
;
227227
entry:
@@ -261,11 +261,11 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
261261

262262
define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
263263
; RECIP-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
264-
; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
264+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
265265
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
266266
;
267267
; SIZE-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
268-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
268+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
269269
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshl
270270
;
271271
entry:

llvm/test/Analysis/CostModel/AArch64/fshr.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,11 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)
129129

130130
define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
131131
; RECIP-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
132-
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
132+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
133133
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
134134
;
135135
; SIZE-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
136-
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
136+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
137137
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %fshr
138138
;
139139
entry:
@@ -173,11 +173,11 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
173173

174174
define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
175175
; RECIP-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
176-
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
176+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
177177
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
178178
;
179179
; SIZE-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
180-
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
180+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
181181
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %fshr
182182
;
183183
entry:
@@ -217,11 +217,11 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
217217

218218
define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
219219
; RECIP-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
220-
; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
220+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
221221
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
222222
;
223223
; SIZE-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
224-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
224+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
225225
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %fshr
226226
;
227227
entry:
@@ -261,11 +261,11 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
261261

262262
define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
263263
; RECIP-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
264-
; RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
264+
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
265265
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
266266
;
267267
; SIZE-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
268-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
268+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
269269
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %fshr
270270
;
271271
entry:

0 commit comments

Comments
 (0)