Skip to content
1 change: 0 additions & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,6 @@ def LoopUnrollHint : InheritableAttr {

def IntelReqdSubGroupSize: InheritableAttr {
let Spellings = [GNU<"intel_reqd_sub_group_size">,
CXX11<"cl", "intel_reqd_sub_group_size">,
CXX11<"intel", "reqd_sub_group_size">];
let Args = [ExprArgument<"SubGroupSize">];
let Subjects = SubjectList<[Function, CXXMethod], ErrorDiag>;
Expand Down
20 changes: 3 additions & 17 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -3476,30 +3476,16 @@ code. See `cl_intel_required_subgroup_size
for details.

SYCL documentation:
The [[cl::intel_reqd_sub_group_size(n)]] and [[intel::reqd_sub_group_size(n)]]
attribute indicates that the kernel must be compiled and executed with a
sub-group of size n. The value of n must be set to a sub-group size supported
by the device, or device compilation will fail.
The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must
be compiled and executed with a sub-group of size n. The value of n must be set
to a sub-group size supported by the device, or device compilation will fail.

In addition to device functions, the required sub-group size attribute may also
be specified in the definition of a named functor object and lambda functions,
as in the examples below:

.. code-block:: c++

class Functor
{
void operator()(item<1> item) [[cl::intel_reqd_sub_group_size(16)]]
{
/* kernel code */
}
}

kernel<class kernel_name>(
[]() [[cl::intel_reqd_sub_group_size(n)]] {
/* kernel code */
});

class Functor
{
[[intel::reqd_sub_group_size(16)]] void operator()(item<1> item)
Expand Down
5 changes: 0 additions & 5 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11063,11 +11063,6 @@ def err_ivdep_declrefexpr_arg : Error<
def warn_ivdep_redundant : Warning <"ignoring redundant Intel FPGA loop "
"attribute 'ivdep': safelen %select{INF|%1}0 >= safelen %select{INF|%3}2">,
InGroup<IgnoredAttributes>;
def warn_attribute_spelling_deprecated : Warning<
"attribute %0 is deprecated">,
InGroup<DeprecatedAttributes>;
def note_spelling_suggestion : Note<
"did you mean to use %0 instead?">;

// errors of expect.with.probability
def err_probability_not_constant_float : Error<
Expand Down
7 changes: 0 additions & 7 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3013,13 +3013,6 @@ static void handleSubGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->getAttr<IntelReqdSubGroupSizeAttr>())
S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL;

if (AL.getAttributeSpellingListIndex() ==
IntelReqdSubGroupSizeAttr::CXX11_cl_intel_reqd_sub_group_size) {
S.Diag(AL.getLoc(), diag::warn_attribute_spelling_deprecated) << AL;
S.Diag(AL.getLoc(), diag::note_spelling_suggestion)
<< "'intel::reqd_sub_group_size'";
}

S.addIntelReqdSubGroupSizeAttr(D, AL, E);
}

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenSYCL/reqd-sub-group-size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

class Functor16 {
public:
[[cl::intel_reqd_sub_group_size(16)]] void operator()() const {}
[[intel::reqd_sub_group_size(16)]] void operator()() const {}
};

[[cl::intel_reqd_sub_group_size(8)]] void foo() {}
[[intel::reqd_sub_group_size(8)]] void foo() {}

class Functor {
public:
Expand All @@ -17,7 +17,7 @@ class Functor {
template <int SIZE>
class Functor5 {
public:
[[cl::intel_reqd_sub_group_size(SIZE)]] void operator()() const {}
[[intel::reqd_sub_group_size(SIZE)]] void operator()() const {}
};

template <typename name, typename Func>
Expand All @@ -33,7 +33,7 @@ void bar() {
kernel<class kernel_name2>(f);

kernel<class kernel_name3>(
[]() [[cl::intel_reqd_sub_group_size(4)]] {});
[]() [[intel::reqd_sub_group_size(4)]]{});

Functor5<2> f5;
kernel<class kernel_name4>(f5);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

class Functor {
public:
[[cl::intel_reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {}
[[intel::reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {}
};

template <typename Name, typename Func>
Expand Down
8 changes: 2 additions & 6 deletions clang/test/SemaSYCL/reqd-sub-group-size-device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

class Functor16 {
public:
// expected-warning@+2 {{attribute 'intel_reqd_sub_group_size' is deprecated}}
// expected-note@+1 {{did you mean to use 'intel::reqd_sub_group_size' instead?}}
[[cl::intel_reqd_sub_group_size(16)]] void operator()() const {}
[[intel::reqd_sub_group_size(16)]] void operator()() const {}
};

class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}}
Expand Down Expand Up @@ -55,9 +53,7 @@ void bar() {

kernel<class kernel_name5>([]() [[intel::reqd_sub_group_size(2)]]{});
kernel<class kernel_name6>([]() [[intel::reqd_sub_group_size(4)]] { foo(); });
// expected-warning@+2 {{attribute 'intel_reqd_sub_group_size' is deprecated}}
// expected-note@+1 {{did you mean to use 'intel::reqd_sub_group_size' instead?}}
kernel<class kernel_name7>([]() [[cl::intel_reqd_sub_group_size(6)]]{});
kernel<class kernel_name7>([]() [[intel::reqd_sub_group_size(6)]]{});

Functor4 f4;
kernel<class kernel_name8>(f4);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/reqd-sub-group-size-host.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s
// expected-no-diagnostics

[[cl::intel_reqd_sub_group_size(8)]] void fun() {}
[[intel::reqd_sub_group_size(8)]] void fun() {}

class Functor {
public:
[[cl::intel_reqd_sub_group_size(16)]] void operator()() {}
[[intel::reqd_sub_group_size(16)]] void operator()() {}
};
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_empty.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
C[wiID] = 43;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_matrix_mult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_no_input_int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_no_opts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
for (int i = 0; i < 10; ++i) {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("fence_sw");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_8_empty.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
C[wiID] = 43;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_8_no_input_int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,8) %0(0,0)<1> 0x7:d"
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_arbitrary_ops_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mad (M1, 8) %0(0, 0)<1> %3(0, 0)<1;1,0> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(D[wiID])
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/inline-asm/asm_decl_in_scope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
// declaration of temp within and outside the scope
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("{\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_float_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(C[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_float_imm_arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
: "=rw"(B[wiID])
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_float_neg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
auto B = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mov (M1, 8) %0(0, 0)<1> (-)%1(0, 0)<1;1,0>"
: "=rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_if.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
int Output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_imm_arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
: "=rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile(".decl P1 v_type=P num_elts=8\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(C[wiID])
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_multiple_instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {
auto D = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("{\n"
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_no_operands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ int main() {
Queue.submit([&](cl::sycl::handler &cgh) {
// Executing kernel
cgh.parallel_for<no_operands_kernel>(
NumOfWorkItems, [=](cl::sycl::id<1> WIid) [[cl::intel_reqd_sub_group_size(8)]] {
NumOfWorkItems, [=](cl::sycl::id<1> WIid)
[[intel::reqd_sub_group_size(8)]] {
// clang-format off
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("barrier");
#endif
});
// clang-format on
});
}
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_no_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
volatile int local_var = 47;
local_var += C[0];
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_plus_mod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
auto B = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 16) %0(0, 0)<1> %0(0, 0)<1;1,0> %1(0, 0)<1;1,0>"
: "+rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_switch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
int Output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
8 changes: 6 additions & 2 deletions sycl/test/inline-asm/letter_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ int main() {
}
q.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<kernel_name>(
cl::sycl::range<1>(problem_size), [=](cl::sycl::id<1> idx)
[[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>(problem_size),
[=](cl::sycl::id<1> idx) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
int i = idx[0];
asm volatile("{\n.decl V52 v_type=G type=d num_elts=16 align=GRF\n"
Expand All @@ -39,7 +41,9 @@ int main() {
:
: "rw"(&a[i]));
#else
// clang-format off
a[idx[0]]++;
// clang-format on
#endif
});
Comment on lines +44 to 48
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clang-format makes unrelated change here. I have turned the format off.

}).wait();
Expand Down
Loading