Skip to content

Commit 707444e

Browse files
committed
Support AVX10.2-SATCVT new instructions.
1 parent a060597 commit 707444e

25 files changed

+13466
-3
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,6 +2022,42 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
20222022
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
20232023
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
20242024

2025+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2026+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2027+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2028+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2029+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2030+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2031+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2032+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2033+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2034+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2035+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2036+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2037+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2038+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2039+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2040+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2041+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2042+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2043+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2044+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2045+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2046+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2047+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2048+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2049+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2050+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2051+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2052+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2053+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2054+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2055+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2056+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2057+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2058+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2059+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2060+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
20252061
#undef BUILTIN
20262062
#undef TARGET_BUILTIN
20272063
#undef TARGET_HEADER_BUILTIN

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ set(x86_files
147147
amxcomplexintrin.h
148148
amxfp16intrin.h
149149
amxintrin.h
150+
avx10_2_512satcvtintrin.h
150151
avx10_2_512niintrin.h
152+
avx10_2satcvtintrin.h
151153
avx10_2niintrin.h
152154
avx2intrin.h
153155
avx512bf16intrin.h

clang/lib/Headers/avx10_2_512satcvtintrin.h

Lines changed: 327 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/Headers/avx10_2satcvtintrin.h

Lines changed: 448 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/Headers/immintrin.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,10 +650,12 @@ _storebe_i64(void * __P, long long __D) {
650650

651651
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
652652
#include <avx10_2niintrin.h>
653+
#include <avx10_2satcvtintrin.h>
653654
#endif
654655

655656
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
656657
#include <avx10_2_512niintrin.h>
658+
#include <avx10_2_512satcvtintrin.h>
657659
#endif
658660

659661
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)

clang/lib/Sema/SemaX86.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
7979
case X86::BI__builtin_ia32_vcomiss:
8080
case X86::BI__builtin_ia32_vcomish:
8181
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
82+
case X86::BI__builtin_ia32_vcvttph2ibs256_mask:
83+
case X86::BI__builtin_ia32_vcvttph2iubs256_mask:
84+
case X86::BI__builtin_ia32_vcvttps2ibs256_mask:
85+
case X86::BI__builtin_ia32_vcvttps2iubs256_mask:
86+
case X86::BI__builtin_ia32_vcvttph2ibs512_mask:
87+
case X86::BI__builtin_ia32_vcvttph2iubs512_mask:
88+
case X86::BI__builtin_ia32_vcvttps2ibs512_mask:
89+
case X86::BI__builtin_ia32_vcvttps2iubs512_mask:
8290
ArgNum = 3;
8391
break;
8492
case X86::BI__builtin_ia32_cmppd512_mask:
@@ -207,6 +215,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
207215
case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
208216
case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
209217
case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
218+
case X86::BI__builtin_ia32_vcvtph2ibs256_mask:
219+
case X86::BI__builtin_ia32_vcvtph2iubs256_mask:
220+
case X86::BI__builtin_ia32_vcvtps2ibs256_mask:
221+
case X86::BI__builtin_ia32_vcvtps2iubs256_mask:
222+
case X86::BI__builtin_ia32_vcvtph2ibs512_mask:
223+
case X86::BI__builtin_ia32_vcvtph2iubs512_mask:
224+
case X86::BI__builtin_ia32_vcvtps2ibs512_mask:
225+
case X86::BI__builtin_ia32_vcvtps2iubs512_mask:
210226
ArgNum = 3;
211227
HasRC = true;
212228
break;
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
2+
// RUN: -emit-llvm -Wall -Werror -verify
3+
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx10.2-512 \
4+
// RUN: -emit-llvm -Wall -Werror -verify
5+
6+
#include <immintrin.h>
7+
8+
__m512i test_mm512_ipcvt_roundph_epi8(__m512h __A) {
9+
return _mm512_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
10+
}
11+
12+
__m512i test_mm512_mask_ipcvt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) {
13+
return _mm512_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
14+
}
15+
16+
__m512i test_mm512_maskz_ipcvt_roundph_epi8(__mmask32 __A, __m512h __B) {
17+
return _mm512_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
18+
}
19+
20+
__m512i test_mm512_ipcvt_roundph_epu8(__m512h __A) {
21+
return _mm512_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
22+
}
23+
24+
__m512i test_mm512_mask_ipcvt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) {
25+
return _mm512_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
26+
}
27+
28+
__m512i test_mm512_maskz_ipcvt_roundph_epu8(__mmask32 __A, __m512h __B) {
29+
return _mm512_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
30+
}
31+
32+
__m512i test_mm512_ipcvt_roundps_epi8(__m512 __A) {
33+
return _mm512_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
34+
}
35+
36+
__m512i test_mm512_mask_ipcvt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) {
37+
return _mm512_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
38+
}
39+
40+
__m512i test_mm512_maskz_ipcvt_roundps_epi8(__mmask16 __A, __m512 __B) {
41+
return _mm512_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
42+
}
43+
44+
__m512i test_mm512_ipcvt_roundps_epu8(__m512 __A) {
45+
return _mm512_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
46+
}
47+
48+
__m512i test_mm512_mask_ipcvt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) {
49+
return _mm512_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
50+
}
51+
52+
__m512i test_mm512_maskz_ipcvt_roundps_epu8(__mmask16 __A, __m512 __B) {
53+
return _mm512_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
54+
}
55+
56+
__m512i test_mm512_ipcvtt_roundph_epi8(__m512h __A) {
57+
return _mm512_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
58+
}
59+
60+
__m512i test_mm512_mask_ipcvtt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) {
61+
return _mm512_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
62+
}
63+
64+
__m512i test_mm512_maskz_ipcvtt_roundph_epi8(__mmask32 __A, __m512h __B) {
65+
return _mm512_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
66+
}
67+
68+
__m512i test_mm512_ipcvtt_roundph_epu8(__m512h __A) {
69+
return _mm512_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
70+
}
71+
72+
__m512i test_mm512_mask_ipcvtt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) {
73+
return _mm512_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
74+
}
75+
76+
__m512i test_mm512_maskz_ipcvtt_roundph_epu8(__mmask32 __A, __m512h __B) {
77+
return _mm512_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
78+
}
79+
80+
__m512i test_mm512_ipcvtt_roundps_epi8(__m512 __A) {
81+
return _mm512_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
82+
}
83+
84+
__m512i test_mm512_mask_ipcvtt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) {
85+
return _mm512_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
86+
}
87+
88+
__m512i test_mm512_maskz_ipcvtt_roundps_epi8(__mmask16 __A, __m512 __B) {
89+
return _mm512_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
90+
}
91+
92+
__m512i test_mm512_ipcvtt_roundps_epu8(__m512 __A) {
93+
return _mm512_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
94+
}
95+
96+
__m512i test_mm512_mask_ipcvtt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) {
97+
return _mm512_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
98+
}
99+
100+
__m512i test_mm512_maskz_ipcvtt_roundps_epu8(__mmask16 __A, __m512 __B) {
101+
return _mm512_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
102+
}
103+
104+
__m256i test_mm256_ipcvt_roundph_epi8(__m256h __A) {
105+
return _mm256_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
106+
}
107+
108+
__m256i test_mm256_mask_ipcvt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) {
109+
return _mm256_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
110+
}
111+
112+
__m256i test_mm256_maskz_ipcvt_roundph_epi8(__mmask16 __A, __m256h __B) {
113+
return _mm256_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
114+
}
115+
116+
__m256i test_mm256_ipcvt_roundph_epu8(__m256h __A) {
117+
return _mm256_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
118+
}
119+
120+
__m256i test_mm256_mask_ipcvt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) {
121+
return _mm256_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
122+
}
123+
124+
__m256i test_mm256_maskz_ipcvt_roundph_epu8(__mmask16 __A, __m256h __B) {
125+
return _mm256_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
126+
}
127+
128+
__m256i test_mm256_ipcvt_roundps_epi8(__m256 __A) {
129+
return _mm256_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
130+
}
131+
132+
__m256i test_mm256_mask_ipcvt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) {
133+
return _mm256_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
134+
}
135+
136+
__m256i test_mm256_maskz_ipcvt_roundps_epi8(__mmask8 __A, __m256 __B) {
137+
return _mm256_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
138+
}
139+
140+
__m256i test_mm256_ipcvt_roundps_epu8(__m256 __A) {
141+
return _mm256_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
142+
}
143+
144+
__m256i test_mm256_mask_ipcvt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) {
145+
return _mm256_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
146+
}
147+
148+
__m256i test_mm256_maskz_ipcvt_roundps_epu8(__mmask8 __A, __m256 __B) {
149+
return _mm256_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
150+
}
151+
152+
__m256i test_mm256_ipcvtt_roundph_epi8(__m256h __A) {
153+
return _mm256_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
154+
}
155+
156+
__m256i test_mm256_mask_ipcvtt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) {
157+
return _mm256_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
158+
}
159+
160+
__m256i test_mm256_maskz_ipcvtt_roundph_epi8(__mmask16 __A, __m256h __B) {
161+
return _mm256_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
162+
}
163+
164+
__m256i test_mm256_ipcvtt_roundph_epu8(__m256h __A) {
165+
return _mm256_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
166+
}
167+
168+
__m256i test_mm256_mask_ipcvtt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) {
169+
return _mm256_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
170+
}
171+
172+
__m256i test_mm256_maskz_ipcvtt_roundph_epu8(__mmask16 __A, __m256h __B) {
173+
return _mm256_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
174+
}
175+
176+
__m256i test_mm256_ipcvtt_roundps_epi8(__m256 __A) {
177+
return _mm256_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
178+
}
179+
180+
__m256i test_mm256_mask_ipcvtt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) {
181+
return _mm256_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
182+
}
183+
184+
__m256i test_mm256_maskz_ipcvtt_roundps_epi8(__mmask8 __A, __m256 __B) {
185+
return _mm256_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
186+
}
187+
188+
__m256i test_mm256_ipcvtt_roundps_epu8(__m256 __A) {
189+
return _mm256_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
190+
}
191+
192+
__m256i test_mm256_mask_ipcvtt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) {
193+
return _mm256_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
194+
}
195+
196+
__m256i test_mm256_maskz_ipcvtt_roundps_epu8(__mmask8 __A, __m256 __B) {
197+
return _mm256_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
198+
}

0 commit comments

Comments
 (0)