-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][AVX10.2] Support AVX10.2-SATCVT new instructions. #101599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
You can test this locally with the following command:git-clang-format --diff 3d5cc7e1e632b74119af13824dabc346bd248c93 59df2576f81d5741271bc7329fade564b83d492d --extensions cpp,inc,c,h -- clang/lib/Headers/avx10_2_512satcvtintrin.h clang/lib/Headers/avx10_2satcvtintrin.h clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c clang/test/CodeGen/X86/avx10_2satcvt-builtins.c clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/lib/Target/X86/X86IntrinsicsInfo.h llvm/test/TableGen/x86-fold-tables.inc View the diff from clang-format here.diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 0dadadb6a0..67b2d5a421 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -68,7 +68,7 @@
#define _mm512_ipcvtph_epi8(A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvtph_epi8(W, U, A) \
@@ -84,7 +84,7 @@
#define _mm512_ipcvt_roundph_epi8(A, R) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
(__v32hu)_mm512_setzero_si512(), \
- (__mmask32)-1, (const int)R))
+ (__mmask32) - 1, (const int)R))
#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
@@ -97,7 +97,7 @@
#define _mm512_ipcvtph_epu8(A) \
((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvtph_epu8(W, U, A) \
@@ -112,7 +112,7 @@
#define _mm512_ipcvt_roundph_epu8(A, R) \
((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
(const int)R))
#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \
@@ -126,7 +126,7 @@
#define _mm512_ipcvtps_epi8(A) \
((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
- (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvtps_epi8(W, U, A) \
@@ -142,7 +142,7 @@
#define _mm512_ipcvt_roundps_epi8(A, R) \
((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
(__v16su)_mm512_setzero_si512(), \
- (__mmask16)-1, (const int)R))
+ (__mmask16) - 1, (const int)R))
#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \
((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
@@ -155,7 +155,7 @@
#define _mm512_ipcvtps_epu8(A) \
((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
- (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvtps_epu8(W, U, A) \
@@ -170,7 +170,7 @@
#define _mm512_ipcvt_roundps_epu8(A, R) \
((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
- (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
(const int)R))
#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \
@@ -184,7 +184,7 @@
#define _mm512_ipcvttph_epi8(A) \
((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvttph_epi8(W, U, A) \
@@ -199,7 +199,7 @@
#define _mm512_ipcvtt_roundph_epi8(A, S) \
((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
S))
#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \
@@ -213,7 +213,7 @@
#define _mm512_ipcvttph_epu8(A) \
((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvttph_epu8(W, U, A) \
@@ -228,7 +228,7 @@
#define _mm512_ipcvtt_roundph_epu8(A, S) \
((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
S))
#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \
@@ -242,7 +242,7 @@
#define _mm512_ipcvttps_epi8(A) \
((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
- (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvttps_epi8(W, U, A) \
@@ -257,7 +257,7 @@
#define _mm512_ipcvtt_roundps_epi8(A, S) \
((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
- (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
S))
#define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \
@@ -271,7 +271,7 @@
#define _mm512_ipcvttps_epu8(A) \
((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
- (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_ipcvttps_epu8(W, U, A) \
@@ -286,7 +286,7 @@
#define _mm512_ipcvtt_roundps_epu8(A, S) \
((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \
- (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
S))
#define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \
diff --git a/clang/lib/Headers/avx10_2satcvtintrin.h b/clang/lib/Headers/avx10_2satcvtintrin.h
index dd5c44fdaf..878f1adf87 100644
--- a/clang/lib/Headers/avx10_2satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtintrin.h
@@ -66,7 +66,7 @@
#define _mm_ipcvtph_epi8(A) \
((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
- (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvtph_epi8(W, U, A) \
((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
@@ -78,7 +78,7 @@
#define _mm256_ipcvtph_epi8(A) \
((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvtph_epi8(W, U, A) \
@@ -94,7 +94,7 @@
#define _mm256_ipcvt_roundph_epi8(A, R) \
((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
(__v16hu)_mm256_setzero_si256(), \
- (__mmask16)-1, (const int)R))
+ (__mmask16) - 1, (const int)R))
#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \
((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
@@ -107,7 +107,7 @@
#define _mm_ipcvtph_epu8(A) \
((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
- (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvtph_epu8(W, U, A) \
((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
@@ -119,7 +119,7 @@
#define _mm256_ipcvtph_epu8(A) \
((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvtph_epu8(W, U, A) \
@@ -134,7 +134,7 @@
#define _mm256_ipcvt_roundph_epu8(A, R) \
((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
(const int)R))
#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \
@@ -148,7 +148,7 @@
#define _mm_ipcvtps_epi8(A) \
((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
- (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvtps_epi8(W, U, A) \
((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
@@ -160,7 +160,7 @@
#define _mm256_ipcvtps_epi8(A) \
((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
- (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
+ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvtps_epi8(W, U, A) \
@@ -176,7 +176,7 @@
#define _mm256_ipcvt_roundps_epi8(A, R) \
((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
(__v8su)_mm256_setzero_si256(), \
- (__mmask8)-1, (const int)R))
+ (__mmask8) - 1, (const int)R))
#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \
((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
@@ -189,7 +189,7 @@
#define _mm_ipcvtps_epu8(A) \
((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
- (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvtps_epu8(W, U, A) \
((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
@@ -201,7 +201,7 @@
#define _mm256_ipcvtps_epu8(A) \
((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
- (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
+ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvtps_epu8(W, U, A) \
@@ -217,7 +217,7 @@
#define _mm256_ipcvt_roundps_epu8(A, R) \
((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
(__v8su)_mm256_setzero_si256(), \
- (__mmask8)-1, (const int)R))
+ (__mmask8) - 1, (const int)R))
#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \
((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
@@ -280,7 +280,7 @@
#define _mm_ipcvttph_epi8(A) \
((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
- (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvttph_epi8(W, U, A) \
((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
@@ -292,7 +292,7 @@
#define _mm256_ipcvttph_epi8(A) \
((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvttph_epi8(W, U, A) \
@@ -307,7 +307,7 @@
#define _mm256_ipcvtt_roundph_epi8(A, R) \
((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
(const int)R))
#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \
@@ -321,7 +321,7 @@
#define _mm_ipcvttph_epu8(A) \
((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
- (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvttph_epu8(W, U, A) \
((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
@@ -333,7 +333,7 @@
#define _mm256_ipcvttph_epu8(A) \
((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvttph_epu8(W, U, A) \
@@ -348,7 +348,7 @@
#define _mm256_ipcvtt_roundph_epu8(A, R) \
((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
- (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
+ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
(const int)R))
#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \
@@ -362,7 +362,7 @@
#define _mm_ipcvttps_epi8(A) \
((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
- (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvttps_epi8(W, U, A) \
((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
@@ -374,7 +374,7 @@
#define _mm256_ipcvttps_epi8(A) \
((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
- (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
+ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvttps_epi8(W, U, A) \
@@ -390,7 +390,7 @@
#define _mm256_ipcvtt_roundps_epi8(A, R) \
((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
(__v8su)_mm256_setzero_si256(), \
- (__mmask8)-1, (const int)R))
+ (__mmask8) - 1, (const int)R))
#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \
((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
@@ -403,7 +403,7 @@
#define _mm_ipcvttps_epu8(A) \
((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
- (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
#define _mm_mask_ipcvttps_epu8(W, U, A) \
((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
@@ -415,7 +415,7 @@
#define _mm256_ipcvttps_epu8(A) \
((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
- (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
+ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1, \
_MM_FROUND_CUR_DIRECTION))
#define _mm256_mask_ipcvttps_epu8(W, U, A) \
@@ -430,7 +430,7 @@
#define _mm256_ipcvtt_roundps_epu8(A, R) \
((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
- (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
+ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1, \
(const int)R))
#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \
|
707444e
to
ba3d078
Compare
ba3d078
to
49bda50
Compare
@llvm/pr-subscribers-mc @llvm/pr-subscribers-llvm-ir Author: Freddy Ye (FreddyLeaf) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Patch is 611.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101599.diff 25 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c49b5c36da4fc..fb55057b8cbc3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2158,6 +2158,44 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
+
+// AVX10.2 SATCVT
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f3d19e38f8f2b..91e106427ba1d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -149,7 +149,9 @@ set(x86_files
amxintrin.h
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
+ avx10_2_512satcvtintrin.h
avx10_2minmaxintrin.h
+ avx10_2satcvtintrin.h
avx10_2niintrin.h
avx2intrin.h
avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
new file mode 100644
index 0000000000000..0ea645bee22f9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -0,0 +1,327 @@
+/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512SATCVTINTRIN_H
+#define __AVX10_2_512SATCVTINTRIN_H
+
+#define _mm512_ipcvtnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtph_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epi8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)_mm512_setzero_si512(), \
+ (__mmask32)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)_mm512_setzero_si512(), \
+ (__mmask32)(U), (const int)R))
+
+#define _mm512_ipcvtph_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epu8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ (const int)R))
+
+#define _mm512_ipcvtps_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)(W), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epi8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)_mm512_setzero_si512(), \
+ (__mmask16)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)_mm512_setzero_si512(), \
+ (__mmask16)(U), (const int)R))
+
+#define _mm512_ipcvtps_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)(W), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epu8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ (const int)R))
+
+#define _mm512_ipcvttnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttph_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epi8(A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ S))
+
+#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ S))
+
+#define _mm512_ipcvttph_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epu8(A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ S))
+
+#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ S))
+
+#define _mm512_ipcvttps_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttps_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \
+ ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM for the fold table change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one nit.
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965