Skip to content

Conversation

Maetveis
Copy link
Contributor

@Maetveis Maetveis commented Jul 2, 2025

These extensions add char and long support to the subgroup builtins.
They are already supported by the Intel Graphics Compiler.

These extensions add char and long support to the subgroup builtins.
It is already supported by the Intel Graphics Compiler.

Co-authored-by: Victor Mustya <[email protected]>
@Maetveis Maetveis requested a review from michalpaszkowski July 2, 2025 09:09
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Jul 2, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 2, 2025

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Mészáros Gergely (Maetveis)

Changes

These extensions add char and long support to the subgroup builtins.
They are already supported by the Intel Graphics Compiler.


Full diff: https://github.com/llvm/llvm-project/pull/146655.diff

1 Files Affected:

  • (modified) clang/lib/Headers/opencl-c.h (+223-39)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 8d8ef497cec49..d028c076c3fa9 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17482,7 +17482,50 @@ double  __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint
 double  __ovld __conv intel_sub_group_shuffle_xor( double, uint );
 #endif
 
-#endif //cl_intel_subgroups
+#if defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||   \
+    defined(cl_intel_subgroups_long)
+
+#if defined(__opencl_c_images)
+uint       __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2);
+uint2      __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2);
+uint4      __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2);
+uint8      __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+uint       __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2);
+uint2      __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2);
+uint4      __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2);
+uint8      __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2);
+#endif // defined(__opencl_c_read_write_images)
+
+uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
+uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
+uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
+uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
+
+#if defined(__opencl_c_images)
+void       __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2, uint);
+void       __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2, uint2);
+void       __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2, uint4);
+void       __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2, uint8);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+void       __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2, uint);
+void       __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2, uint2);
+void       __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2, uint4);
+void       __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2, uint8);
+#endif // defined(__opencl_c_read_write_images)
+
+void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
+void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
+void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
+void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
+
+#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
+       // defined(cl_intel_subgroups_long)
+#endif // cl_intel_subgroups
 
 #if defined(cl_intel_subgroups_short)
 short       __ovld __conv intel_sub_group_broadcast( short , uint sub_group_local_id );
@@ -17574,44 +17617,6 @@ ushort      __ovld __conv intel_sub_group_scan_inclusive_min( ushort  x );
 short       __ovld __conv intel_sub_group_scan_inclusive_max( short   x );
 ushort      __ovld __conv intel_sub_group_scan_inclusive_max( ushort  x );
 
-#if defined(__opencl_c_images)
-uint       __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2);
-uint2      __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2);
-uint4      __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2);
-uint8      __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2);
-#endif // defined(__opencl_c_images)
-
-#if defined(__opencl_c_read_write_images)
-uint       __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2);
-uint2      __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2);
-uint4      __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2);
-uint8      __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2);
-#endif // defined(__opencl_c_read_write_images)
-
-uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
-uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
-uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
-uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
-
-#if defined(__opencl_c_images)
-void       __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2, uint);
-void       __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2, uint2);
-void       __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2, uint4);
-void       __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2, uint8);
-#endif //defined(__opencl_c_images)
-
-#if defined(__opencl_c_read_write_images)
-void       __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2, uint);
-void       __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2, uint2);
-void       __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2, uint4);
-void       __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2, uint8);
-#endif // defined(__opencl_c_read_write_images)
-
-void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
-void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
-void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
-void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
-
 #if defined(__opencl_c_images)
 ushort      __ovld __conv intel_sub_group_block_read_us(read_only image2d_t, int2);
 ushort2     __ovld __conv intel_sub_group_block_read_us2(read_only image2d_t, int2);
@@ -17651,6 +17656,185 @@ void        __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, u
 void        __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
 #endif // cl_intel_subgroups_short
 
+#if defined(cl_intel_subgroups_char)
+char       __ovld __conv intel_sub_group_broadcast( char , uint sub_group_local_id );
+char2      __ovld __conv intel_sub_group_broadcast( char2, uint sub_group_local_id );
+char3      __ovld __conv intel_sub_group_broadcast( char3, uint sub_group_local_id );
+char4      __ovld __conv intel_sub_group_broadcast( char4, uint sub_group_local_id );
+char8      __ovld __conv intel_sub_group_broadcast( char8, uint sub_group_local_id );
+
+uchar      __ovld __conv intel_sub_group_broadcast( uchar , uint sub_group_local_id );
+uchar2     __ovld __conv intel_sub_group_broadcast( uchar2, uint sub_group_local_id );
+uchar3     __ovld __conv intel_sub_group_broadcast( uchar3, uint sub_group_local_id );
+uchar4     __ovld __conv intel_sub_group_broadcast( uchar4, uint sub_group_local_id );
+uchar8     __ovld __conv intel_sub_group_broadcast( uchar8, uint sub_group_local_id );
+
+char       __ovld __conv intel_sub_group_shuffle( char  , uint );
+char2      __ovld __conv intel_sub_group_shuffle( char2 , uint );
+char3      __ovld __conv intel_sub_group_shuffle( char3 , uint );
+char4      __ovld __conv intel_sub_group_shuffle( char4 , uint );
+char8      __ovld __conv intel_sub_group_shuffle( char8 , uint );
+char16     __ovld __conv intel_sub_group_shuffle( char16, uint);
+
+uchar      __ovld __conv intel_sub_group_shuffle( uchar  , uint );
+uchar2     __ovld __conv intel_sub_group_shuffle( uchar2 , uint );
+uchar3     __ovld __conv intel_sub_group_shuffle( uchar3 , uint );
+uchar4     __ovld __conv intel_sub_group_shuffle( uchar4 , uint );
+uchar8     __ovld __conv intel_sub_group_shuffle( uchar8 , uint );
+uchar16    __ovld __conv intel_sub_group_shuffle( uchar16, uint );
+
+char       __ovld __conv intel_sub_group_shuffle_down( char   cur, char   next, uint );
+char2      __ovld __conv intel_sub_group_shuffle_down( char2  cur, char2  next, uint );
+char3      __ovld __conv intel_sub_group_shuffle_down( char3  cur, char3  next, uint );
+char4      __ovld __conv intel_sub_group_shuffle_down( char4  cur, char4  next, uint );
+char8      __ovld __conv intel_sub_group_shuffle_down( char8  cur, char8  next, uint );
+char16     __ovld __conv intel_sub_group_shuffle_down( char16 cur, char16 next, uint );
+
+uchar      __ovld __conv intel_sub_group_shuffle_down( uchar   cur, uchar   next, uint );
+uchar2     __ovld __conv intel_sub_group_shuffle_down( uchar2  cur, uchar2  next, uint );
+uchar3     __ovld __conv intel_sub_group_shuffle_down( uchar3  cur, uchar3  next, uint );
+uchar4     __ovld __conv intel_sub_group_shuffle_down( uchar4  cur, uchar4  next, uint );
+uchar8     __ovld __conv intel_sub_group_shuffle_down( uchar8  cur, uchar8  next, uint );
+uchar16    __ovld __conv intel_sub_group_shuffle_down( uchar16 cur, uchar16 next, uint );
+
+char       __ovld __conv intel_sub_group_shuffle_up( char   cur, char   next, uint );
+char2      __ovld __conv intel_sub_group_shuffle_up( char2  cur, char2  next, uint );
+char3      __ovld __conv intel_sub_group_shuffle_up( char3  cur, char3  next, uint );
+char4      __ovld __conv intel_sub_group_shuffle_up( char4  cur, char4  next, uint );
+char8      __ovld __conv intel_sub_group_shuffle_up( char8  cur, char8  next, uint );
+char16     __ovld __conv intel_sub_group_shuffle_up( char16 cur, char16 next, uint );
+
+uchar      __ovld __conv intel_sub_group_shuffle_up( uchar   cur, uchar   next, uint );
+uchar2     __ovld __conv intel_sub_group_shuffle_up( uchar2  cur, uchar2  next, uint );
+uchar3     __ovld __conv intel_sub_group_shuffle_up( uchar3  cur, uchar3  next, uint );
+uchar4     __ovld __conv intel_sub_group_shuffle_up( uchar4  cur, uchar4  next, uint );
+uchar8     __ovld __conv intel_sub_group_shuffle_up( uchar8  cur, uchar8  next, uint );
+uchar16    __ovld __conv intel_sub_group_shuffle_up( uchar16 cur, uchar16 next, uint );
+
+char       __ovld __conv intel_sub_group_shuffle_xor( char  , uint );
+char2      __ovld __conv intel_sub_group_shuffle_xor( char2 , uint );
+char3      __ovld __conv intel_sub_group_shuffle_xor( char3 , uint );
+char4      __ovld __conv intel_sub_group_shuffle_xor( char4 , uint );
+char8      __ovld __conv intel_sub_group_shuffle_xor( char8 , uint );
+char16     __ovld __conv intel_sub_group_shuffle_xor( char16, uint );
+
+uchar      __ovld __conv intel_sub_group_shuffle_xor( uchar  , uint );
+uchar2     __ovld __conv intel_sub_group_shuffle_xor( uchar2 , uint );
+uchar3     __ovld __conv intel_sub_group_shuffle_xor( uchar3 , uint );
+uchar4     __ovld __conv intel_sub_group_shuffle_xor( uchar4 , uint );
+uchar8     __ovld __conv intel_sub_group_shuffle_xor( uchar8 , uint );
+uchar16    __ovld __conv intel_sub_group_shuffle_xor( uchar16, uint );
+
+char       __ovld __conv intel_sub_group_reduce_add( char   x );
+uchar      __ovld __conv intel_sub_group_reduce_add( uchar  x );
+char       __ovld __conv intel_sub_group_reduce_min( char   x );
+uchar      __ovld __conv intel_sub_group_reduce_min( uchar  x );
+char       __ovld __conv intel_sub_group_reduce_max( char   x );
+uchar      __ovld __conv intel_sub_group_reduce_max( uchar  x );
+
+char       __ovld __conv intel_sub_group_scan_exclusive_add( char   x );
+uchar      __ovld __conv intel_sub_group_scan_exclusive_add( uchar  x );
+char       __ovld __conv intel_sub_group_scan_exclusive_min( char   x );
+uchar      __ovld __conv intel_sub_group_scan_exclusive_min( uchar  x );
+char       __ovld __conv intel_sub_group_scan_exclusive_max( char   x );
+uchar      __ovld __conv intel_sub_group_scan_exclusive_max( uchar  x );
+
+char       __ovld __conv intel_sub_group_scan_inclusive_add( char   x );
+uchar      __ovld __conv intel_sub_group_scan_inclusive_add( uchar  x );
+char       __ovld __conv intel_sub_group_scan_inclusive_min( char   x );
+uchar      __ovld __conv intel_sub_group_scan_inclusive_min( uchar  x );
+char       __ovld __conv intel_sub_group_scan_inclusive_max( char   x );
+uchar      __ovld __conv intel_sub_group_scan_inclusive_max( uchar  x );
+
+#if defined(__opencl_c_images)
+uchar      __ovld __conv intel_sub_group_block_read_uc(read_only image2d_t, int2);
+uchar2     __ovld __conv intel_sub_group_block_read_uc2(read_only image2d_t, int2);
+uchar4     __ovld __conv intel_sub_group_block_read_uc4(read_only image2d_t, int2);
+uchar8     __ovld __conv intel_sub_group_block_read_uc8(read_only image2d_t, int2);
+uchar16    __ovld __conv intel_sub_group_block_read_uc16(read_only image2d_t, int2);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+uchar      __ovld __conv intel_sub_group_block_read_uc(read_write image2d_t, int2);
+uchar2     __ovld __conv intel_sub_group_block_read_uc2(read_write image2d_t, int2);
+uchar4     __ovld __conv intel_sub_group_block_read_uc4(read_write image2d_t, int2);
+uchar8     __ovld __conv intel_sub_group_block_read_uc8(read_write image2d_t, int2);
+uchar16    __ovld __conv intel_sub_group_block_read_uc16(read_write image2d_t, int2);
+#endif // defined(__opencl_c_read_write_images)
+
+uchar      __ovld __conv intel_sub_group_block_read_uc(  const __global uchar* p );
+uchar2     __ovld __conv intel_sub_group_block_read_uc2( const __global uchar* p );
+uchar4     __ovld __conv intel_sub_group_block_read_uc4( const __global uchar* p );
+uchar8     __ovld __conv intel_sub_group_block_read_uc8( const __global uchar* p );
+uchar16    __ovld __conv intel_sub_group_block_read_uc16( const __global uchar* p );
+
+#if defined(__opencl_c_images)
+void        __ovld __conv intel_sub_group_block_write_uc(write_only image2d_t, int2, uchar);
+void        __ovld __conv intel_sub_group_block_write_uc2(write_only image2d_t, int2, uchar2);
+void        __ovld __conv intel_sub_group_block_write_uc4(write_only image2d_t, int2, uchar4);
+void        __ovld __conv intel_sub_group_block_write_uc8(write_only image2d_t, int2, uchar8);
+void        __ovld __conv intel_sub_group_block_write_uc16(write_only image2d_t, int2, uchar16);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+void        __ovld __conv intel_sub_group_block_write_uc(read_write image2d_t, int2, uchar);
+void        __ovld __conv intel_sub_group_block_write_uc2(read_write image2d_t, int2, uchar2);
+void        __ovld __conv intel_sub_group_block_write_uc4(read_write image2d_t, int2, uchar4);
+void        __ovld __conv intel_sub_group_block_write_uc8(read_write image2d_t, int2, uchar8);
+void        __ovld __conv intel_sub_group_block_write_uc16(read_write image2d_t, int2, uchar16);
+#endif // defined(__opencl_c_read_write_images)
+
+void        __ovld __conv intel_sub_group_block_write_uc(  __global uchar* p, uchar  data );
+void        __ovld __conv intel_sub_group_block_write_uc2( __global uchar* p, uchar2 data );
+void        __ovld __conv intel_sub_group_block_write_uc4( __global uchar* p, uchar4 data );
+void        __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uchar8 data );
+void        __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
+#endif // cl_intel_subgroups_char
+
+#if defined(cl_intel_subgroups_long)
+#if defined(__opencl_c_images)
+ulong      __ovld __conv intel_sub_group_block_read_ul(read_only image2d_t, int2);
+ulong2     __ovld __conv intel_sub_group_block_read_ul2(read_only image2d_t, int2);
+ulong4     __ovld __conv intel_sub_group_block_read_ul4(read_only image2d_t, int2);
+ulong8     __ovld __conv intel_sub_group_block_read_ul8(read_only image2d_t, int2);
+ulong16    __ovld __conv intel_sub_group_block_read_ul16(read_only image2d_t, int2);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+ulong      __ovld __conv intel_sub_group_block_read_ul(read_write image2d_t, int2);
+ulong2     __ovld __conv intel_sub_group_block_read_ul2(read_write image2d_t, int2);
+ulong4     __ovld __conv intel_sub_group_block_read_ul4(read_write image2d_t, int2);
+ulong8     __ovld __conv intel_sub_group_block_read_ul8(read_write image2d_t, int2);
+ulong16    __ovld __conv intel_sub_group_block_read_ul16(read_write image2d_t, int2);
+#endif // defined(__opencl_c_read_write_images)
+
+ulong      __ovld __conv intel_sub_group_block_read_ul(  const __global ulong* p );
+ulong2     __ovld __conv intel_sub_group_block_read_ul2( const __global ulong* p );
+ulong4     __ovld __conv intel_sub_group_block_read_ul4( const __global ulong* p );
+ulong8     __ovld __conv intel_sub_group_block_read_ul8( const __global ulong* p );
+
+#if defined(__opencl_c_images)
+void        __ovld __conv intel_sub_group_block_write_ul(write_only image2d_t, int2, ulong);
+void        __ovld __conv intel_sub_group_block_write_ul2(write_only image2d_t, int2, ulong2);
+void        __ovld __conv intel_sub_group_block_write_ul4(write_only image2d_t, int2, ulong4);
+void        __ovld __conv intel_sub_group_block_write_ul8(write_only image2d_t, int2, ulong8);
+void        __ovld __conv intel_sub_group_block_write_ul16(write_only image2d_t, int2, ulong16);
+#endif // defined(__opencl_c_images)
+
+#if defined(__opencl_c_read_write_images)
+void        __ovld __conv intel_sub_group_block_write_ul(read_write image2d_t, int2, ulong);
+void        __ovld __conv intel_sub_group_block_write_ul2(read_write image2d_t, int2, ulong2);
+void        __ovld __conv intel_sub_group_block_write_ul4(read_write image2d_t, int2, ulong4);
+void        __ovld __conv intel_sub_group_block_write_ul8(read_write image2d_t, int2, ulong8);
+void        __ovld __conv intel_sub_group_block_write_ul16(read_write image2d_t, int2, ulong16);
+#endif // defined(__opencl_c_read_write_images)
+
+void        __ovld __conv intel_sub_group_block_write_ul(  __global ulong* p, ulong  data );
+void        __ovld __conv intel_sub_group_block_write_ul2( __global ulong* p, ulong2 data );
+void        __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ulong4 data );
+void        __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
+#endif // cl_intel_subgroups_long
+
 #ifdef cl_intel_device_side_avc_motion_estimation
 #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
 

@Maetveis Maetveis added OpenCL and removed clang Clang issues not falling into any other category backend:X86 labels Jul 2, 2025
Copy link

github-actions bot commented Jul 2, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff HEAD~1 HEAD --extensions h -- clang/lib/Headers/opencl-c.h
View the diff from clang-format here.
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index d028c076c..35721f1af 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17486,45 +17486,56 @@ double  __ovld __conv intel_sub_group_shuffle_xor( double, uint );
     defined(cl_intel_subgroups_long)
 
 #if defined(__opencl_c_images)
-uint       __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2);
-uint2      __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2);
-uint4      __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2);
-uint8      __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2);
+uint __ovld __conv intel_sub_group_block_read_ui(read_only image2d_t, int2);
+uint2 __ovld __conv intel_sub_group_block_read_ui2(read_only image2d_t, int2);
+uint4 __ovld __conv intel_sub_group_block_read_ui4(read_only image2d_t, int2);
+uint8 __ovld __conv intel_sub_group_block_read_ui8(read_only image2d_t, int2);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-uint       __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2);
-uint2      __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2);
-uint4      __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2);
-uint8      __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2);
+uint __ovld __conv intel_sub_group_block_read_ui(read_write image2d_t, int2);
+uint2 __ovld __conv intel_sub_group_block_read_ui2(read_write image2d_t, int2);
+uint4 __ovld __conv intel_sub_group_block_read_ui4(read_write image2d_t, int2);
+uint8 __ovld __conv intel_sub_group_block_read_ui8(read_write image2d_t, int2);
 #endif // defined(__opencl_c_read_write_images)
 
-uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
-uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
-uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
-uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
+uint __ovld __conv intel_sub_group_block_read_ui(const __global uint *p);
+uint2 __ovld __conv intel_sub_group_block_read_ui2(const __global uint *p);
+uint4 __ovld __conv intel_sub_group_block_read_ui4(const __global uint *p);
+uint8 __ovld __conv intel_sub_group_block_read_ui8(const __global uint *p);
 
 #if defined(__opencl_c_images)
-void       __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2, uint);
-void       __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2, uint2);
-void       __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2, uint4);
-void       __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2, uint8);
+void __ovld __conv intel_sub_group_block_write_ui(read_only image2d_t, int2,
+                                                  uint);
+void __ovld __conv intel_sub_group_block_write_ui2(read_only image2d_t, int2,
+                                                   uint2);
+void __ovld __conv intel_sub_group_block_write_ui4(read_only image2d_t, int2,
+                                                   uint4);
+void __ovld __conv intel_sub_group_block_write_ui8(read_only image2d_t, int2,
+                                                   uint8);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-void       __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2, uint);
-void       __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2, uint2);
-void       __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2, uint4);
-void       __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2, uint8);
+void __ovld __conv intel_sub_group_block_write_ui(read_write image2d_t, int2,
+                                                  uint);
+void __ovld __conv intel_sub_group_block_write_ui2(read_write image2d_t, int2,
+                                                   uint2);
+void __ovld __conv intel_sub_group_block_write_ui4(read_write image2d_t, int2,
+                                                   uint4);
+void __ovld __conv intel_sub_group_block_write_ui8(read_write image2d_t, int2,
+                                                   uint8);
 #endif // defined(__opencl_c_read_write_images)
 
-void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
-void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
-void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
-void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
+void __ovld __conv intel_sub_group_block_write_ui(__global uint *p, uint data);
+void __ovld __conv intel_sub_group_block_write_ui2(__global uint *p,
+                                                   uint2 data);
+void __ovld __conv intel_sub_group_block_write_ui4(__global uint *p,
+                                                   uint4 data);
+void __ovld __conv intel_sub_group_block_write_ui8(__global uint *p,
+                                                   uint8 data);
 
-#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short) ||
-       // defined(cl_intel_subgroups_long)
+#endif // defined(cl_intel_subgroups_char) || defined(cl_intel_subgroups_short)
+       // || defined(cl_intel_subgroups_long)
 #endif // cl_intel_subgroups
 
 #if defined(cl_intel_subgroups_short)
@@ -17657,182 +17668,222 @@ void        __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, u
 #endif // cl_intel_subgroups_short
 
 #if defined(cl_intel_subgroups_char)
-char       __ovld __conv intel_sub_group_broadcast( char , uint sub_group_local_id );
-char2      __ovld __conv intel_sub_group_broadcast( char2, uint sub_group_local_id );
-char3      __ovld __conv intel_sub_group_broadcast( char3, uint sub_group_local_id );
-char4      __ovld __conv intel_sub_group_broadcast( char4, uint sub_group_local_id );
-char8      __ovld __conv intel_sub_group_broadcast( char8, uint sub_group_local_id );
-
-uchar      __ovld __conv intel_sub_group_broadcast( uchar , uint sub_group_local_id );
-uchar2     __ovld __conv intel_sub_group_broadcast( uchar2, uint sub_group_local_id );
-uchar3     __ovld __conv intel_sub_group_broadcast( uchar3, uint sub_group_local_id );
-uchar4     __ovld __conv intel_sub_group_broadcast( uchar4, uint sub_group_local_id );
-uchar8     __ovld __conv intel_sub_group_broadcast( uchar8, uint sub_group_local_id );
-
-char       __ovld __conv intel_sub_group_shuffle( char  , uint );
-char2      __ovld __conv intel_sub_group_shuffle( char2 , uint );
-char3      __ovld __conv intel_sub_group_shuffle( char3 , uint );
-char4      __ovld __conv intel_sub_group_shuffle( char4 , uint );
-char8      __ovld __conv intel_sub_group_shuffle( char8 , uint );
-char16     __ovld __conv intel_sub_group_shuffle( char16, uint);
-
-uchar      __ovld __conv intel_sub_group_shuffle( uchar  , uint );
-uchar2     __ovld __conv intel_sub_group_shuffle( uchar2 , uint );
-uchar3     __ovld __conv intel_sub_group_shuffle( uchar3 , uint );
-uchar4     __ovld __conv intel_sub_group_shuffle( uchar4 , uint );
-uchar8     __ovld __conv intel_sub_group_shuffle( uchar8 , uint );
-uchar16    __ovld __conv intel_sub_group_shuffle( uchar16, uint );
-
-char       __ovld __conv intel_sub_group_shuffle_down( char   cur, char   next, uint );
-char2      __ovld __conv intel_sub_group_shuffle_down( char2  cur, char2  next, uint );
-char3      __ovld __conv intel_sub_group_shuffle_down( char3  cur, char3  next, uint );
-char4      __ovld __conv intel_sub_group_shuffle_down( char4  cur, char4  next, uint );
-char8      __ovld __conv intel_sub_group_shuffle_down( char8  cur, char8  next, uint );
-char16     __ovld __conv intel_sub_group_shuffle_down( char16 cur, char16 next, uint );
-
-uchar      __ovld __conv intel_sub_group_shuffle_down( uchar   cur, uchar   next, uint );
-uchar2     __ovld __conv intel_sub_group_shuffle_down( uchar2  cur, uchar2  next, uint );
-uchar3     __ovld __conv intel_sub_group_shuffle_down( uchar3  cur, uchar3  next, uint );
-uchar4     __ovld __conv intel_sub_group_shuffle_down( uchar4  cur, uchar4  next, uint );
-uchar8     __ovld __conv intel_sub_group_shuffle_down( uchar8  cur, uchar8  next, uint );
-uchar16    __ovld __conv intel_sub_group_shuffle_down( uchar16 cur, uchar16 next, uint );
-
-char       __ovld __conv intel_sub_group_shuffle_up( char   cur, char   next, uint );
-char2      __ovld __conv intel_sub_group_shuffle_up( char2  cur, char2  next, uint );
-char3      __ovld __conv intel_sub_group_shuffle_up( char3  cur, char3  next, uint );
-char4      __ovld __conv intel_sub_group_shuffle_up( char4  cur, char4  next, uint );
-char8      __ovld __conv intel_sub_group_shuffle_up( char8  cur, char8  next, uint );
-char16     __ovld __conv intel_sub_group_shuffle_up( char16 cur, char16 next, uint );
-
-uchar      __ovld __conv intel_sub_group_shuffle_up( uchar   cur, uchar   next, uint );
-uchar2     __ovld __conv intel_sub_group_shuffle_up( uchar2  cur, uchar2  next, uint );
-uchar3     __ovld __conv intel_sub_group_shuffle_up( uchar3  cur, uchar3  next, uint );
-uchar4     __ovld __conv intel_sub_group_shuffle_up( uchar4  cur, uchar4  next, uint );
-uchar8     __ovld __conv intel_sub_group_shuffle_up( uchar8  cur, uchar8  next, uint );
-uchar16    __ovld __conv intel_sub_group_shuffle_up( uchar16 cur, uchar16 next, uint );
-
-char       __ovld __conv intel_sub_group_shuffle_xor( char  , uint );
-char2      __ovld __conv intel_sub_group_shuffle_xor( char2 , uint );
-char3      __ovld __conv intel_sub_group_shuffle_xor( char3 , uint );
-char4      __ovld __conv intel_sub_group_shuffle_xor( char4 , uint );
-char8      __ovld __conv intel_sub_group_shuffle_xor( char8 , uint );
-char16     __ovld __conv intel_sub_group_shuffle_xor( char16, uint );
-
-uchar      __ovld __conv intel_sub_group_shuffle_xor( uchar  , uint );
-uchar2     __ovld __conv intel_sub_group_shuffle_xor( uchar2 , uint );
-uchar3     __ovld __conv intel_sub_group_shuffle_xor( uchar3 , uint );
-uchar4     __ovld __conv intel_sub_group_shuffle_xor( uchar4 , uint );
-uchar8     __ovld __conv intel_sub_group_shuffle_xor( uchar8 , uint );
-uchar16    __ovld __conv intel_sub_group_shuffle_xor( uchar16, uint );
-
-char       __ovld __conv intel_sub_group_reduce_add( char   x );
-uchar      __ovld __conv intel_sub_group_reduce_add( uchar  x );
-char       __ovld __conv intel_sub_group_reduce_min( char   x );
-uchar      __ovld __conv intel_sub_group_reduce_min( uchar  x );
-char       __ovld __conv intel_sub_group_reduce_max( char   x );
-uchar      __ovld __conv intel_sub_group_reduce_max( uchar  x );
-
-char       __ovld __conv intel_sub_group_scan_exclusive_add( char   x );
-uchar      __ovld __conv intel_sub_group_scan_exclusive_add( uchar  x );
-char       __ovld __conv intel_sub_group_scan_exclusive_min( char   x );
-uchar      __ovld __conv intel_sub_group_scan_exclusive_min( uchar  x );
-char       __ovld __conv intel_sub_group_scan_exclusive_max( char   x );
-uchar      __ovld __conv intel_sub_group_scan_exclusive_max( uchar  x );
-
-char       __ovld __conv intel_sub_group_scan_inclusive_add( char   x );
-uchar      __ovld __conv intel_sub_group_scan_inclusive_add( uchar  x );
-char       __ovld __conv intel_sub_group_scan_inclusive_min( char   x );
-uchar      __ovld __conv intel_sub_group_scan_inclusive_min( uchar  x );
-char       __ovld __conv intel_sub_group_scan_inclusive_max( char   x );
-uchar      __ovld __conv intel_sub_group_scan_inclusive_max( uchar  x );
+char __ovld __conv intel_sub_group_broadcast(char, uint sub_group_local_id);
+char2 __ovld __conv intel_sub_group_broadcast(char2, uint sub_group_local_id);
+char3 __ovld __conv intel_sub_group_broadcast(char3, uint sub_group_local_id);
+char4 __ovld __conv intel_sub_group_broadcast(char4, uint sub_group_local_id);
+char8 __ovld __conv intel_sub_group_broadcast(char8, uint sub_group_local_id);
+
+uchar __ovld __conv intel_sub_group_broadcast(uchar, uint sub_group_local_id);
+uchar2 __ovld __conv intel_sub_group_broadcast(uchar2, uint sub_group_local_id);
+uchar3 __ovld __conv intel_sub_group_broadcast(uchar3, uint sub_group_local_id);
+uchar4 __ovld __conv intel_sub_group_broadcast(uchar4, uint sub_group_local_id);
+uchar8 __ovld __conv intel_sub_group_broadcast(uchar8, uint sub_group_local_id);
+
+char __ovld __conv intel_sub_group_shuffle(char, uint);
+char2 __ovld __conv intel_sub_group_shuffle(char2, uint);
+char3 __ovld __conv intel_sub_group_shuffle(char3, uint);
+char4 __ovld __conv intel_sub_group_shuffle(char4, uint);
+char8 __ovld __conv intel_sub_group_shuffle(char8, uint);
+char16 __ovld __conv intel_sub_group_shuffle(char16, uint);
+
+uchar __ovld __conv intel_sub_group_shuffle(uchar, uint);
+uchar2 __ovld __conv intel_sub_group_shuffle(uchar2, uint);
+uchar3 __ovld __conv intel_sub_group_shuffle(uchar3, uint);
+uchar4 __ovld __conv intel_sub_group_shuffle(uchar4, uint);
+uchar8 __ovld __conv intel_sub_group_shuffle(uchar8, uint);
+uchar16 __ovld __conv intel_sub_group_shuffle(uchar16, uint);
+
+char __ovld __conv intel_sub_group_shuffle_down(char cur, char next, uint);
+char2 __ovld __conv intel_sub_group_shuffle_down(char2 cur, char2 next, uint);
+char3 __ovld __conv intel_sub_group_shuffle_down(char3 cur, char3 next, uint);
+char4 __ovld __conv intel_sub_group_shuffle_down(char4 cur, char4 next, uint);
+char8 __ovld __conv intel_sub_group_shuffle_down(char8 cur, char8 next, uint);
+char16 __ovld __conv intel_sub_group_shuffle_down(char16 cur, char16 next,
+                                                  uint);
+
+uchar __ovld __conv intel_sub_group_shuffle_down(uchar cur, uchar next, uint);
+uchar2 __ovld __conv intel_sub_group_shuffle_down(uchar2 cur, uchar2 next,
+                                                  uint);
+uchar3 __ovld __conv intel_sub_group_shuffle_down(uchar3 cur, uchar3 next,
+                                                  uint);
+uchar4 __ovld __conv intel_sub_group_shuffle_down(uchar4 cur, uchar4 next,
+                                                  uint);
+uchar8 __ovld __conv intel_sub_group_shuffle_down(uchar8 cur, uchar8 next,
+                                                  uint);
+uchar16 __ovld __conv intel_sub_group_shuffle_down(uchar16 cur, uchar16 next,
+                                                   uint);
+
+char __ovld __conv intel_sub_group_shuffle_up(char cur, char next, uint);
+char2 __ovld __conv intel_sub_group_shuffle_up(char2 cur, char2 next, uint);
+char3 __ovld __conv intel_sub_group_shuffle_up(char3 cur, char3 next, uint);
+char4 __ovld __conv intel_sub_group_shuffle_up(char4 cur, char4 next, uint);
+char8 __ovld __conv intel_sub_group_shuffle_up(char8 cur, char8 next, uint);
+char16 __ovld __conv intel_sub_group_shuffle_up(char16 cur, char16 next, uint);
+
+uchar __ovld __conv intel_sub_group_shuffle_up(uchar cur, uchar next, uint);
+uchar2 __ovld __conv intel_sub_group_shuffle_up(uchar2 cur, uchar2 next, uint);
+uchar3 __ovld __conv intel_sub_group_shuffle_up(uchar3 cur, uchar3 next, uint);
+uchar4 __ovld __conv intel_sub_group_shuffle_up(uchar4 cur, uchar4 next, uint);
+uchar8 __ovld __conv intel_sub_group_shuffle_up(uchar8 cur, uchar8 next, uint);
+uchar16 __ovld __conv intel_sub_group_shuffle_up(uchar16 cur, uchar16 next,
+                                                 uint);
+
+char __ovld __conv intel_sub_group_shuffle_xor(char, uint);
+char2 __ovld __conv intel_sub_group_shuffle_xor(char2, uint);
+char3 __ovld __conv intel_sub_group_shuffle_xor(char3, uint);
+char4 __ovld __conv intel_sub_group_shuffle_xor(char4, uint);
+char8 __ovld __conv intel_sub_group_shuffle_xor(char8, uint);
+char16 __ovld __conv intel_sub_group_shuffle_xor(char16, uint);
+
+uchar __ovld __conv intel_sub_group_shuffle_xor(uchar, uint);
+uchar2 __ovld __conv intel_sub_group_shuffle_xor(uchar2, uint);
+uchar3 __ovld __conv intel_sub_group_shuffle_xor(uchar3, uint);
+uchar4 __ovld __conv intel_sub_group_shuffle_xor(uchar4, uint);
+uchar8 __ovld __conv intel_sub_group_shuffle_xor(uchar8, uint);
+uchar16 __ovld __conv intel_sub_group_shuffle_xor(uchar16, uint);
+
+char __ovld __conv intel_sub_group_reduce_add(char x);
+uchar __ovld __conv intel_sub_group_reduce_add(uchar x);
+char __ovld __conv intel_sub_group_reduce_min(char x);
+uchar __ovld __conv intel_sub_group_reduce_min(uchar x);
+char __ovld __conv intel_sub_group_reduce_max(char x);
+uchar __ovld __conv intel_sub_group_reduce_max(uchar x);
+
+char __ovld __conv intel_sub_group_scan_exclusive_add(char x);
+uchar __ovld __conv intel_sub_group_scan_exclusive_add(uchar x);
+char __ovld __conv intel_sub_group_scan_exclusive_min(char x);
+uchar __ovld __conv intel_sub_group_scan_exclusive_min(uchar x);
+char __ovld __conv intel_sub_group_scan_exclusive_max(char x);
+uchar __ovld __conv intel_sub_group_scan_exclusive_max(uchar x);
+
+char __ovld __conv intel_sub_group_scan_inclusive_add(char x);
+uchar __ovld __conv intel_sub_group_scan_inclusive_add(uchar x);
+char __ovld __conv intel_sub_group_scan_inclusive_min(char x);
+uchar __ovld __conv intel_sub_group_scan_inclusive_min(uchar x);
+char __ovld __conv intel_sub_group_scan_inclusive_max(char x);
+uchar __ovld __conv intel_sub_group_scan_inclusive_max(uchar x);
 
 #if defined(__opencl_c_images)
-uchar      __ovld __conv intel_sub_group_block_read_uc(read_only image2d_t, int2);
-uchar2     __ovld __conv intel_sub_group_block_read_uc2(read_only image2d_t, int2);
-uchar4     __ovld __conv intel_sub_group_block_read_uc4(read_only image2d_t, int2);
-uchar8     __ovld __conv intel_sub_group_block_read_uc8(read_only image2d_t, int2);
-uchar16    __ovld __conv intel_sub_group_block_read_uc16(read_only image2d_t, int2);
+uchar __ovld __conv intel_sub_group_block_read_uc(read_only image2d_t, int2);
+uchar2 __ovld __conv intel_sub_group_block_read_uc2(read_only image2d_t, int2);
+uchar4 __ovld __conv intel_sub_group_block_read_uc4(read_only image2d_t, int2);
+uchar8 __ovld __conv intel_sub_group_block_read_uc8(read_only image2d_t, int2);
+uchar16 __ovld __conv intel_sub_group_block_read_uc16(read_only image2d_t,
+                                                      int2);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-uchar      __ovld __conv intel_sub_group_block_read_uc(read_write image2d_t, int2);
-uchar2     __ovld __conv intel_sub_group_block_read_uc2(read_write image2d_t, int2);
-uchar4     __ovld __conv intel_sub_group_block_read_uc4(read_write image2d_t, int2);
-uchar8     __ovld __conv intel_sub_group_block_read_uc8(read_write image2d_t, int2);
-uchar16    __ovld __conv intel_sub_group_block_read_uc16(read_write image2d_t, int2);
+uchar __ovld __conv intel_sub_group_block_read_uc(read_write image2d_t, int2);
+uchar2 __ovld __conv intel_sub_group_block_read_uc2(read_write image2d_t, int2);
+uchar4 __ovld __conv intel_sub_group_block_read_uc4(read_write image2d_t, int2);
+uchar8 __ovld __conv intel_sub_group_block_read_uc8(read_write image2d_t, int2);
+uchar16 __ovld __conv intel_sub_group_block_read_uc16(read_write image2d_t,
+                                                      int2);
 #endif // defined(__opencl_c_read_write_images)
 
-uchar      __ovld __conv intel_sub_group_block_read_uc(  const __global uchar* p );
-uchar2     __ovld __conv intel_sub_group_block_read_uc2( const __global uchar* p );
-uchar4     __ovld __conv intel_sub_group_block_read_uc4( const __global uchar* p );
-uchar8     __ovld __conv intel_sub_group_block_read_uc8( const __global uchar* p );
-uchar16    __ovld __conv intel_sub_group_block_read_uc16( const __global uchar* p );
+uchar __ovld __conv intel_sub_group_block_read_uc(const __global uchar *p);
+uchar2 __ovld __conv intel_sub_group_block_read_uc2(const __global uchar *p);
+uchar4 __ovld __conv intel_sub_group_block_read_uc4(const __global uchar *p);
+uchar8 __ovld __conv intel_sub_group_block_read_uc8(const __global uchar *p);
+uchar16 __ovld __conv intel_sub_group_block_read_uc16(const __global uchar *p);
 
 #if defined(__opencl_c_images)
-void        __ovld __conv intel_sub_group_block_write_uc(write_only image2d_t, int2, uchar);
-void        __ovld __conv intel_sub_group_block_write_uc2(write_only image2d_t, int2, uchar2);
-void        __ovld __conv intel_sub_group_block_write_uc4(write_only image2d_t, int2, uchar4);
-void        __ovld __conv intel_sub_group_block_write_uc8(write_only image2d_t, int2, uchar8);
-void        __ovld __conv intel_sub_group_block_write_uc16(write_only image2d_t, int2, uchar16);
+void __ovld __conv intel_sub_group_block_write_uc(write_only image2d_t, int2,
+                                                  uchar);
+void __ovld __conv intel_sub_group_block_write_uc2(write_only image2d_t, int2,
+                                                   uchar2);
+void __ovld __conv intel_sub_group_block_write_uc4(write_only image2d_t, int2,
+                                                   uchar4);
+void __ovld __conv intel_sub_group_block_write_uc8(write_only image2d_t, int2,
+                                                   uchar8);
+void __ovld __conv intel_sub_group_block_write_uc16(write_only image2d_t, int2,
+                                                    uchar16);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-void        __ovld __conv intel_sub_group_block_write_uc(read_write image2d_t, int2, uchar);
-void        __ovld __conv intel_sub_group_block_write_uc2(read_write image2d_t, int2, uchar2);
-void        __ovld __conv intel_sub_group_block_write_uc4(read_write image2d_t, int2, uchar4);
-void        __ovld __conv intel_sub_group_block_write_uc8(read_write image2d_t, int2, uchar8);
-void        __ovld __conv intel_sub_group_block_write_uc16(read_write image2d_t, int2, uchar16);
+void __ovld __conv intel_sub_group_block_write_uc(read_write image2d_t, int2,
+                                                  uchar);
+void __ovld __conv intel_sub_group_block_write_uc2(read_write image2d_t, int2,
+                                                   uchar2);
+void __ovld __conv intel_sub_group_block_write_uc4(read_write image2d_t, int2,
+                                                   uchar4);
+void __ovld __conv intel_sub_group_block_write_uc8(read_write image2d_t, int2,
+                                                   uchar8);
+void __ovld __conv intel_sub_group_block_write_uc16(read_write image2d_t, int2,
+                                                    uchar16);
 #endif // defined(__opencl_c_read_write_images)
 
-void        __ovld __conv intel_sub_group_block_write_uc(  __global uchar* p, uchar  data );
-void        __ovld __conv intel_sub_group_block_write_uc2( __global uchar* p, uchar2 data );
-void        __ovld __conv intel_sub_group_block_write_uc4( __global uchar* p, uchar4 data );
-void        __ovld __conv intel_sub_group_block_write_uc8( __global uchar* p, uchar8 data );
-void        __ovld __conv intel_sub_group_block_write_uc16( __global uchar* p, uchar16 data );
+void __ovld __conv intel_sub_group_block_write_uc(__global uchar *p,
+                                                  uchar data);
+void __ovld __conv intel_sub_group_block_write_uc2(__global uchar *p,
+                                                   uchar2 data);
+void __ovld __conv intel_sub_group_block_write_uc4(__global uchar *p,
+                                                   uchar4 data);
+void __ovld __conv intel_sub_group_block_write_uc8(__global uchar *p,
+                                                   uchar8 data);
+void __ovld __conv intel_sub_group_block_write_uc16(__global uchar *p,
+                                                    uchar16 data);
 #endif // cl_intel_subgroups_char
 
 #if defined(cl_intel_subgroups_long)
 #if defined(__opencl_c_images)
-ulong      __ovld __conv intel_sub_group_block_read_ul(read_only image2d_t, int2);
-ulong2     __ovld __conv intel_sub_group_block_read_ul2(read_only image2d_t, int2);
-ulong4     __ovld __conv intel_sub_group_block_read_ul4(read_only image2d_t, int2);
-ulong8     __ovld __conv intel_sub_group_block_read_ul8(read_only image2d_t, int2);
-ulong16    __ovld __conv intel_sub_group_block_read_ul16(read_only image2d_t, int2);
+ulong __ovld __conv intel_sub_group_block_read_ul(read_only image2d_t, int2);
+ulong2 __ovld __conv intel_sub_group_block_read_ul2(read_only image2d_t, int2);
+ulong4 __ovld __conv intel_sub_group_block_read_ul4(read_only image2d_t, int2);
+ulong8 __ovld __conv intel_sub_group_block_read_ul8(read_only image2d_t, int2);
+ulong16 __ovld __conv intel_sub_group_block_read_ul16(read_only image2d_t,
+                                                      int2);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-ulong      __ovld __conv intel_sub_group_block_read_ul(read_write image2d_t, int2);
-ulong2     __ovld __conv intel_sub_group_block_read_ul2(read_write image2d_t, int2);
-ulong4     __ovld __conv intel_sub_group_block_read_ul4(read_write image2d_t, int2);
-ulong8     __ovld __conv intel_sub_group_block_read_ul8(read_write image2d_t, int2);
-ulong16    __ovld __conv intel_sub_group_block_read_ul16(read_write image2d_t, int2);
+ulong __ovld __conv intel_sub_group_block_read_ul(read_write image2d_t, int2);
+ulong2 __ovld __conv intel_sub_group_block_read_ul2(read_write image2d_t, int2);
+ulong4 __ovld __conv intel_sub_group_block_read_ul4(read_write image2d_t, int2);
+ulong8 __ovld __conv intel_sub_group_block_read_ul8(read_write image2d_t, int2);
+ulong16 __ovld __conv intel_sub_group_block_read_ul16(read_write image2d_t,
+                                                      int2);
 #endif // defined(__opencl_c_read_write_images)
 
-ulong      __ovld __conv intel_sub_group_block_read_ul(  const __global ulong* p );
-ulong2     __ovld __conv intel_sub_group_block_read_ul2( const __global ulong* p );
-ulong4     __ovld __conv intel_sub_group_block_read_ul4( const __global ulong* p );
-ulong8     __ovld __conv intel_sub_group_block_read_ul8( const __global ulong* p );
+ulong __ovld __conv intel_sub_group_block_read_ul(const __global ulong *p);
+ulong2 __ovld __conv intel_sub_group_block_read_ul2(const __global ulong *p);
+ulong4 __ovld __conv intel_sub_group_block_read_ul4(const __global ulong *p);
+ulong8 __ovld __conv intel_sub_group_block_read_ul8(const __global ulong *p);
 
 #if defined(__opencl_c_images)
-void        __ovld __conv intel_sub_group_block_write_ul(write_only image2d_t, int2, ulong);
-void        __ovld __conv intel_sub_group_block_write_ul2(write_only image2d_t, int2, ulong2);
-void        __ovld __conv intel_sub_group_block_write_ul4(write_only image2d_t, int2, ulong4);
-void        __ovld __conv intel_sub_group_block_write_ul8(write_only image2d_t, int2, ulong8);
-void        __ovld __conv intel_sub_group_block_write_ul16(write_only image2d_t, int2, ulong16);
+void __ovld __conv intel_sub_group_block_write_ul(write_only image2d_t, int2,
+                                                  ulong);
+void __ovld __conv intel_sub_group_block_write_ul2(write_only image2d_t, int2,
+                                                   ulong2);
+void __ovld __conv intel_sub_group_block_write_ul4(write_only image2d_t, int2,
+                                                   ulong4);
+void __ovld __conv intel_sub_group_block_write_ul8(write_only image2d_t, int2,
+                                                   ulong8);
+void __ovld __conv intel_sub_group_block_write_ul16(write_only image2d_t, int2,
+                                                    ulong16);
 #endif // defined(__opencl_c_images)
 
 #if defined(__opencl_c_read_write_images)
-void        __ovld __conv intel_sub_group_block_write_ul(read_write image2d_t, int2, ulong);
-void        __ovld __conv intel_sub_group_block_write_ul2(read_write image2d_t, int2, ulong2);
-void        __ovld __conv intel_sub_group_block_write_ul4(read_write image2d_t, int2, ulong4);
-void        __ovld __conv intel_sub_group_block_write_ul8(read_write image2d_t, int2, ulong8);
-void        __ovld __conv intel_sub_group_block_write_ul16(read_write image2d_t, int2, ulong16);
+void __ovld __conv intel_sub_group_block_write_ul(read_write image2d_t, int2,
+                                                  ulong);
+void __ovld __conv intel_sub_group_block_write_ul2(read_write image2d_t, int2,
+                                                   ulong2);
+void __ovld __conv intel_sub_group_block_write_ul4(read_write image2d_t, int2,
+                                                   ulong4);
+void __ovld __conv intel_sub_group_block_write_ul8(read_write image2d_t, int2,
+                                                   ulong8);
+void __ovld __conv intel_sub_group_block_write_ul16(read_write image2d_t, int2,
+                                                    ulong16);
 #endif // defined(__opencl_c_read_write_images)
 
-void        __ovld __conv intel_sub_group_block_write_ul(  __global ulong* p, ulong  data );
-void        __ovld __conv intel_sub_group_block_write_ul2( __global ulong* p, ulong2 data );
-void        __ovld __conv intel_sub_group_block_write_ul4( __global ulong* p, ulong4 data );
-void        __ovld __conv intel_sub_group_block_write_ul8( __global ulong* p, ulong8 data);
+void __ovld __conv intel_sub_group_block_write_ul(__global ulong *p,
+                                                  ulong data);
+void __ovld __conv intel_sub_group_block_write_ul2(__global ulong *p,
+                                                   ulong2 data);
+void __ovld __conv intel_sub_group_block_write_ul4(__global ulong *p,
+                                                   ulong4 data);
+void __ovld __conv intel_sub_group_block_write_ul8(__global ulong *p,
+                                                   ulong8 data);
 #endif // cl_intel_subgroups_long
 
 #ifdef cl_intel_device_side_avc_motion_estimation

@Maetveis
Copy link
Contributor Author

Maetveis commented Jul 2, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

This file is not formatted currently, the changes should match the current formatting style.

@Maetveis
Copy link
Contributor Author

Maetveis commented Jul 3, 2025

@michalpaszkowski can you review or know who might be comfortable reviewing this?

Copy link
Member

@michalpaszkowski michalpaszkowski left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

@Maetveis Maetveis merged commit ddcccc4 into llvm:main Jul 4, 2025
11 of 12 checks passed
@Maetveis Maetveis deleted the opencl-headers-implement-subgroups-char branch July 4, 2025 04:25
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:headers Headers provided by Clang, e.g. for intrinsics OpenCL
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants