Skip to content

[X86][AVX10.2] Support AVX10.2-SATCVT new instructions. #101599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 6, 2024

Conversation

FreddyLeaf
Copy link
Contributor

@FreddyLeaf FreddyLeaf commented Aug 2, 2024

Copy link

github-actions bot commented Aug 2, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 3d5cc7e1e632b74119af13824dabc346bd248c93 59df2576f81d5741271bc7329fade564b83d492d --extensions cpp,inc,c,h -- clang/lib/Headers/avx10_2_512satcvtintrin.h clang/lib/Headers/avx10_2satcvtintrin.h clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c clang/test/CodeGen/X86/avx10_2satcvt-builtins.c clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/lib/Target/X86/X86IntrinsicsInfo.h llvm/test/TableGen/x86-fold-tables.inc
View the diff from clang-format here.
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 0dadadb6a0..67b2d5a421 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -68,7 +68,7 @@
 
 #define _mm512_ipcvtph_epi8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
@@ -84,7 +84,7 @@
 #define _mm512_ipcvt_roundph_epi8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                               (__v32hu)_mm512_setzero_si512(), \
-                                              (__mmask32)-1, (const int)R))
+                                              (__mmask32) - 1, (const int)R))
 
 #define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
@@ -97,7 +97,7 @@
 
 #define _mm512_ipcvtph_epu8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtph_epu8(W, U, A)                                      \
@@ -112,7 +112,7 @@
 
 #define _mm512_ipcvt_roundph_epu8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       (const int)R))
 
 #define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
@@ -126,7 +126,7 @@
 
 #define _mm512_ipcvtps_epi8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtps_epi8(W, U, A)                                      \
@@ -142,7 +142,7 @@
 #define _mm512_ipcvt_roundps_epi8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
                                               (__v16su)_mm512_setzero_si512(), \
-                                              (__mmask16)-1, (const int)R))
+                                              (__mmask16) - 1, (const int)R))
 
 #define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
@@ -155,7 +155,7 @@
 
 #define _mm512_ipcvtps_epu8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtps_epu8(W, U, A)                                      \
@@ -170,7 +170,7 @@
 
 #define _mm512_ipcvt_roundps_epu8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       (const int)R))
 
 #define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
@@ -184,7 +184,7 @@
 
 #define _mm512_ipcvttph_epi8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttph_epi8(W, U, A)                                     \
@@ -199,7 +199,7 @@
 
 #define _mm512_ipcvtt_roundph_epi8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S)                            \
@@ -213,7 +213,7 @@
 
 #define _mm512_ipcvttph_epu8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttph_epu8(W, U, A)                                     \
@@ -228,7 +228,7 @@
 
 #define _mm512_ipcvtt_roundph_epu8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S)                            \
@@ -242,7 +242,7 @@
 
 #define _mm512_ipcvttps_epi8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttps_epi8(W, U, A)                                     \
@@ -257,7 +257,7 @@
 
 #define _mm512_ipcvtt_roundps_epi8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S)                            \
@@ -271,7 +271,7 @@
 
 #define _mm512_ipcvttps_epu8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttps_epu8(W, U, A)                                     \
@@ -286,7 +286,7 @@
 
 #define _mm512_ipcvtt_roundps_epu8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S)                            \
diff --git a/clang/lib/Headers/avx10_2satcvtintrin.h b/clang/lib/Headers/avx10_2satcvtintrin.h
index dd5c44fdaf..878f1adf87 100644
--- a/clang/lib/Headers/avx10_2satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtintrin.h
@@ -66,7 +66,7 @@
 
 #define _mm_ipcvtph_epi8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtph_epi8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A),            \
@@ -78,7 +78,7 @@
 
 #define _mm256_ipcvtph_epi8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtph_epi8(W, U, A)                                      \
@@ -94,7 +94,7 @@
 #define _mm256_ipcvt_roundph_epi8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
                                               (__v16hu)_mm256_setzero_si256(), \
-                                              (__mmask16)-1, (const int)R))
+                                              (__mmask16) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
@@ -107,7 +107,7 @@
 
 #define _mm_ipcvtph_epu8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtph_epu8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A),           \
@@ -119,7 +119,7 @@
 
 #define _mm256_ipcvtph_epu8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtph_epu8(W, U, A)                                      \
@@ -134,7 +134,7 @@
 
 #define _mm256_ipcvt_roundph_epu8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
@@ -148,7 +148,7 @@
 
 #define _mm_ipcvtps_epi8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtps_epi8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A),             \
@@ -160,7 +160,7 @@
 
 #define _mm256_ipcvtps_epi8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtps_epi8(W, U, A)                                      \
@@ -176,7 +176,7 @@
 #define _mm256_ipcvt_roundps_epi8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
                                               (__v8su)_mm256_setzero_si256(),  \
-                                              (__mmask8)-1, (const int)R))
+                                              (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
@@ -189,7 +189,7 @@
 
 #define _mm_ipcvtps_epu8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtps_epu8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A),            \
@@ -201,7 +201,7 @@
 
 #define _mm256_ipcvtps_epu8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtps_epu8(W, U, A)                                      \
@@ -217,7 +217,7 @@
 #define _mm256_ipcvt_roundps_epu8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
                                                (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8)-1, (const int)R))
+                                               (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
@@ -280,7 +280,7 @@
 
 #define _mm_ipcvttph_epi8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttph_epi8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A),           \
@@ -292,7 +292,7 @@
 
 #define _mm256_ipcvttph_epi8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttph_epi8(W, U, A)                                     \
@@ -307,7 +307,7 @@
 
 #define _mm256_ipcvtt_roundph_epi8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R)                            \
@@ -321,7 +321,7 @@
 
 #define _mm_ipcvttph_epu8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttph_epu8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A),          \
@@ -333,7 +333,7 @@
 
 #define _mm256_ipcvttph_epu8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttph_epu8(W, U, A)                                     \
@@ -348,7 +348,7 @@
 
 #define _mm256_ipcvtt_roundph_epu8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R)                            \
@@ -362,7 +362,7 @@
 
 #define _mm_ipcvttps_epi8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttps_epi8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A),            \
@@ -374,7 +374,7 @@
 
 #define _mm256_ipcvttps_epi8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttps_epi8(W, U, A)                                     \
@@ -390,7 +390,7 @@
 #define _mm256_ipcvtt_roundps_epi8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
                                                (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8)-1, (const int)R))
+                                               (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R)                            \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
@@ -403,7 +403,7 @@
 
 #define _mm_ipcvttps_epu8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttps_epu8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A),           \
@@ -415,7 +415,7 @@
 
 #define _mm256_ipcvttps_epu8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttps_epu8(W, U, A)                                     \
@@ -430,7 +430,7 @@
 
 #define _mm256_ipcvtt_roundps_epu8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R)                            \

@FreddyLeaf FreddyLeaf marked this pull request as ready for review August 5, 2024 03:29
@FreddyLeaf FreddyLeaf requested a review from RKSimon August 5, 2024 03:29
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics mc Machine (object) code llvm:ir labels Aug 5, 2024
@FreddyLeaf FreddyLeaf requested a review from e-kud August 5, 2024 03:30
@llvmbot
Copy link
Member

llvmbot commented Aug 5, 2024

@llvm/pr-subscribers-mc
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-ir

Author: Freddy Ye (FreddyLeaf)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965


Patch is 611.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101599.diff

25 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+38)
  • (modified) clang/lib/Headers/CMakeLists.txt (+2)
  • (added) clang/lib/Headers/avx10_2_512satcvtintrin.h (+327)
  • (added) clang/lib/Headers/avx10_2satcvtintrin.h (+448)
  • (modified) clang/lib/Headers/immintrin.h (+2)
  • (modified) clang/lib/Sema/SemaX86.cpp (+16)
  • (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+198)
  • (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+379)
  • (added) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+603)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+112)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+20)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.h (+21)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+170)
  • (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+82)
  • (modified) llvm/lib/Target/X86/X86InstrUtils.td (+3-3)
  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+72)
  • (added) llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll (+1003)
  • (added) llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll (+1618)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-32.txt (+1363)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-64.txt (+1363)
  • (added) llvm/test/MC/X86/avx10.2satcvt-32-att.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-32-intel.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-64-att.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-64-intel.s (+1362)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+216)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c49b5c36da4fc..fb55057b8cbc3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2158,6 +2158,44 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
 TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
+
+// AVX10.2 SATCVT
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f3d19e38f8f2b..91e106427ba1d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -149,7 +149,9 @@ set(x86_files
   amxintrin.h
   avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2_512satcvtintrin.h
   avx10_2minmaxintrin.h
+  avx10_2satcvtintrin.h
   avx10_2niintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
new file mode 100644
index 0000000000000..0ea645bee22f9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -0,0 +1,327 @@
+/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512SATCVTINTRIN_H
+#define __AVX10_2_512SATCVTINTRIN_H
+
+#define _mm512_ipcvtnebf16_epi8(A)                                             \
+  ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epi8(W, U, A)                                  \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A),                     \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epi8(U, A)                                    \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A),                     \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtnebf16_epu8(A)                                             \
+  ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epu8(W, U, A)                                  \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A),                     \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epu8(U, A)                                    \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A),                     \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epi8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtph_epi8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)(W), (__mmask32)(U),    \
+                                              _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epi8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epi8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)_mm512_setzero_si512(), \
+                                              (__mmask32)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)_mm512_setzero_si512(), \
+                                              (__mmask32)(U), (const int)R))
+
+#define _mm512_ipcvtph_epu8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epu8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A),          \
+                                               (__v32hu)(W), (__mmask32)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epu8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epu8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      (const int)R))
+
+#define _mm512_ipcvtps_epi8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epi8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)(W), (__mmask16)(U),    \
+                                              _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epi8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epi8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)_mm512_setzero_si512(), \
+                                              (__mmask16)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)_mm512_setzero_si512(), \
+                                              (__mmask16)(U), (const int)R))
+
+#define _mm512_ipcvtps_epu8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epu8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A),           \
+                                               (__v16su)(W), (__mmask16)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epu8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epu8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      (const int)R))
+
+#define _mm512_ipcvttnebf16_epi8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttph_epi8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epi8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A),          \
+                                               (__v32hu)(W), (__mmask32)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epi8(U, A)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epi8(A, S)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      S))
+
+#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S)                            \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S)                              \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      S))
+
+#define _mm512_ipcvttph_epu8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epu8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A),         \
+                                                (__v32hu)(W), (__mmask32)(U),  \
+                                                _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epu8(U, A)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epu8(A, S)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      S))
+
+#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S)                            \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S)                              \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      S))
+
+#define _mm512_ipcvttps_epi8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttps_epi8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A),          \
+                 ...
[truncated]

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM for the fold table change

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one nit.

@FreddyLeaf FreddyLeaf merged commit 80721e0 into llvm:main Aug 6, 2024
6 of 7 checks passed
@FreddyLeaf FreddyLeaf deleted the avx10-satcvt branch August 6, 2024 11:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category llvm:ir mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants