Skip to content

Commit b2b68c2

Browse files
committed
[CostModel][X86] Add add/sat sat intrinsic costs
Fixes regressions from #97463 due to missing costs for custom lowered ops
1 parent de792d6 commit b2b68c2

13 files changed

+2532
-2449
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 126 additions & 58 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll

Lines changed: 289 additions & 289 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll

Lines changed: 350 additions & 350 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll

Lines changed: 328 additions & 328 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-ssat.ll

Lines changed: 159 additions & 159 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-usat-codesize.ll

Lines changed: 348 additions & 348 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-usat-latency.ll

Lines changed: 252 additions & 252 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-usat-sizelatency.ll

Lines changed: 342 additions & 342 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/arith-usat.ll

Lines changed: 156 additions & 156 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -26,70 +26,70 @@ declare i16 @llvm.sadd.sat.i16(i16, i16)
2626
declare i8 @llvm.sadd.sat.i8 (i8 , i8 )
2727

2828
define void @add_v8i64() {
29-
; SSE-LABEL: @add_v8i64(
30-
; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8
31-
; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
32-
; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
33-
; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
34-
; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
35-
; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
36-
; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
37-
; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
38-
; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8
39-
; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
40-
; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
41-
; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
42-
; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
43-
; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
44-
; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
45-
; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
46-
; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A0]], i64 [[B0]])
47-
; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A1]], i64 [[B1]])
48-
; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A2]], i64 [[B2]])
49-
; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A3]], i64 [[B3]])
50-
; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A4]], i64 [[B4]])
51-
; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A5]], i64 [[B5]])
52-
; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A6]], i64 [[B6]])
53-
; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A7]], i64 [[B7]])
54-
; SSE-NEXT: store i64 [[R0]], ptr @c64, align 8
55-
; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
56-
; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
57-
; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
58-
; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
59-
; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
60-
; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
61-
; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
62-
; SSE-NEXT: ret void
29+
; SSE2-LABEL: @add_v8i64(
30+
; SSE2-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8
31+
; SSE2-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
32+
; SSE2-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
33+
; SSE2-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
34+
; SSE2-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
35+
; SSE2-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
36+
; SSE2-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
37+
; SSE2-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
38+
; SSE2-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8
39+
; SSE2-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
40+
; SSE2-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
41+
; SSE2-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
42+
; SSE2-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
43+
; SSE2-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
44+
; SSE2-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
45+
; SSE2-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
46+
; SSE2-NEXT: [[R0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A0]], i64 [[B0]])
47+
; SSE2-NEXT: [[R1:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A1]], i64 [[B1]])
48+
; SSE2-NEXT: [[R2:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A2]], i64 [[B2]])
49+
; SSE2-NEXT: [[R3:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A3]], i64 [[B3]])
50+
; SSE2-NEXT: [[R4:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A4]], i64 [[B4]])
51+
; SSE2-NEXT: [[R5:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A5]], i64 [[B5]])
52+
; SSE2-NEXT: [[R6:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A6]], i64 [[B6]])
53+
; SSE2-NEXT: [[R7:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A7]], i64 [[B7]])
54+
; SSE2-NEXT: store i64 [[R0]], ptr @c64, align 8
55+
; SSE2-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
56+
; SSE2-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
57+
; SSE2-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
58+
; SSE2-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
59+
; SSE2-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
60+
; SSE2-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
61+
; SSE2-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
62+
; SSE2-NEXT: ret void
6363
;
64-
; AVX1-LABEL: @add_v8i64(
65-
; AVX1-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
66-
; AVX1-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
67-
; AVX1-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
68-
; AVX1-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
69-
; AVX1-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
70-
; AVX1-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
71-
; AVX1-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
72-
; AVX1-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
73-
; AVX1-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
74-
; AVX1-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
75-
; AVX1-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
76-
; AVX1-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
77-
; AVX1-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
78-
; AVX1-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
79-
; AVX1-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
80-
; AVX1-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
81-
; AVX1-NEXT: ret void
64+
; SLM-LABEL: @add_v8i64(
65+
; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
66+
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
67+
; SLM-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
68+
; SLM-NEXT: store <2 x i64> [[TMP3]], ptr @c64, align 8
69+
; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
70+
; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
71+
; SLM-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
72+
; SLM-NEXT: store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
73+
; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
74+
; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
75+
; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
76+
; SLM-NEXT: store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
77+
; SLM-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
78+
; SLM-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
79+
; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
80+
; SLM-NEXT: store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
81+
; SLM-NEXT: ret void
8282
;
83-
; AVX2-LABEL: @add_v8i64(
84-
; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
85-
; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
86-
; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
87-
; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
88-
; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
89-
; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
90-
; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
91-
; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
92-
; AVX2-NEXT: ret void
83+
; AVX-LABEL: @add_v8i64(
84+
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
85+
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
86+
; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
87+
; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8
88+
; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
89+
; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
90+
; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
91+
; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
92+
; AVX-NEXT: ret void
9393
;
9494
; AVX512-LABEL: @add_v8i64(
9595
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
@@ -714,5 +714,5 @@ define void @add_v64i8() {
714714
ret void
715715
}
716716
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
717-
; SLM: {{.*}}
718-
; SSE2: {{.*}}
717+
; AVX1: {{.*}}
718+
; AVX2: {{.*}}

0 commit comments

Comments
 (0)