intel
diff --git a/‎SYCL/Basic/half_builtins.cpp
Lines changed: 2 additions & 1 deletion b/‎SYCL/Basic/half_builtins.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎SYCL/DeviceLib/built-ins/ext_native_math.cpp
Lines changed: 184 additions & 0 deletions b/‎SYCL/DeviceLib/built-ins/ext_native_math.cpp
Lines changed: 184 additions & 0 deletions
diff --git a/‎SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp
Lines changed: 169 additions & 0 deletions b/‎SYCL/DeviceLib/half_precision_math_test_marray_vec.cpp
Lines changed: 169 additions & 0 deletions
@@ -166,6 +166,7 @@ template <int N> bool check(vec<float, N> a, vec<float, N> b) {
 
 int main() {
   queue q;
+  if (q.get_device().has(sycl::aspect::fp16)) {
   float16 a, b, c, d;
   for (int i = 0; i < SZ_max; i++) {
     a[i] = i / (float)SZ_max;
@@ -193,6 +194,6 @@ int main() {
     });
   }
   assert(err == 0);
-
+}
   return 0;
 }
@@ -0,0 +1,184 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this
+// test is compiled with the -fsycl-device-code-split flag
+
+#include <CL/sycl.hpp>
+#include <cassert>
+
+template <typename T, size_t N>
+void assert_out_of_bound(sycl::marray<T, N> val, sycl::marray<T, N> lower,
+                         sycl::marray<T, N> upper) {
+  for (int i = 0; i < N; i++) {
+    assert(lower[i] < val[i] && val[i] < upper[i]);
+  }
+}
+
+template <typename T> void assert_out_of_bound(T val, T lower, T upper) {
+  assert(sycl::all(lower < val && val < upper));
+}
+
+template <>
+void assert_out_of_bound<float>(float val, float lower, float upper) {
+  assert(lower < val && val < upper);
+}
+
+template <>
+void assert_out_of_bound<sycl::half>(sycl::half val, sycl::half lower,
+                                     sycl::half upper) {
+  assert(lower < val && val < upper);
+}
+
+template <typename T>
+void native_tanh_tester(sycl::queue q, T val, T up, T lo) {
+  T r = val;
+
+#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
+  {
+    sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
+    q.submit([&](sycl::handler &cgh) {
+      auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
+      cgh.single_task([=]() {
+        AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]);
+      });
+    });
+  }
+
+  assert_out_of_bound(r, up, lo);
+#else
+  assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
+#endif
+}
+
+template <typename T>
+void native_exp2_tester(sycl::queue q, T val, T up, T lo) {
+  T r = val;
+
+#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
+  {
+    sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
+    q.submit([&](sycl::handler &cgh) {
+      auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
+      cgh.single_task([=]() {
+        AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]);
+      });
+    });
+  }
+
+  assert_out_of_bound(r, up, lo);
+#else
+  assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
+#endif
+}
+
+int main() {
+
+  sycl::queue q;
+
+  const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0,  1.5, 1.0,   0.0,
+                         -1.7, 1.7,  -1.2, 1.2, -3.0, 3.0, -10.0, 10.0};
+  const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75,  -0.1,
+                         -0.94, 0.92,  -0.84, 0.82, -1.0, 0.98, -1.10, 0.98};
+  const double tu[16] = {-0.95, -0.89, -0.75, 0.1,  0.97,  0.91, 0.77,  0.1,
+                         -0.92, 0.94,  -0.82, 0.84, -0.98, 1.00, -0.98, 1.10};
+
+  native_tanh_tester<float>(q, tv[0], tl[0], tu[0]);
+  native_tanh_tester<sycl::float2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
+                                   {tu[0], tu[1]});
+  native_tanh_tester<sycl::float3>(
+      q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
+  
+  native_tanh_tester<sycl::float4>(q, {tv[0], tv[1], tv[2], tv[3]},
+                                   {tl[0], tl[1], tl[2], tl[3]},
+                                   {tu[0], tu[1], tu[2], tu[3]});
+  native_tanh_tester<sycl::marray<float, 3>>(q, {tv[0], tv[1], tv[2]},
+                                           {tl[0], tl[1], tl[2]},
+                                           {tu[0], tu[1], tu[2]});
+  native_tanh_tester<sycl::marray<float, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
+                                           {tl[0], tl[1], tl[2], tl[3]},
+                                           {tu[0], tu[1], tu[2], tu[3]});
+  native_tanh_tester<sycl::marray<float, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
+                                   {tl[0], tl[1], tl[2], tl[3]},
+                                   {tu[0], tu[1], tu[2], tu[3]});
+  native_tanh_tester<sycl::float8>(
+      q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
+      {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
+      {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
+  native_tanh_tester<sycl::float16>(
+      q,
+      {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
+       tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
+      {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
+       tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
+      {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
+       tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});
+
+  if (q.get_device().has(sycl::aspect::fp16)) {
+
+    native_tanh_tester<sycl::half>(q, tv[0], tl[0], tu[0]);
+    native_tanh_tester<sycl::half2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
+                                    {tu[0], tu[1]});
+    native_tanh_tester<sycl::half3>(
+        q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
+    native_tanh_tester<sycl::marray<sycl::half, 3>>(
+        q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
+    native_tanh_tester<sycl::half4>(q, {tv[0], tv[1], tv[2], tv[3]},
+                                    {tl[0], tl[1], tl[2], tl[3]},
+                                    {tu[0], tu[1], tu[2], tu[3]});
+    native_tanh_tester<sycl::marray<sycl::half, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
+                                    {tl[0], tl[1], tl[2], tl[3]},
+                                    {tu[0], tu[1], tu[2], tu[3]});
+    native_tanh_tester<sycl::half8>(
+        q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
+        {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
+        {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
+    native_tanh_tester<sycl::half16>(
+        q,
+        {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
+         tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
+        {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
+         tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
+        {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
+         tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});
+
+    const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
+                           -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0};
+    const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9,
+                           0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9};
+    const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1,
+                           0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1};
+
+    native_exp2_tester<sycl::half>(q, ev[0], el[0], eu[0]);
+    native_exp2_tester<sycl::half2>(q, {ev[0], ev[1]}, {el[0], el[1]},
+                                    {eu[0], eu[1]});
+    native_exp2_tester<sycl::half3>(
+        q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]});
+    native_exp2_tester<sycl::half4>(q, {ev[0], ev[1], ev[2], ev[3]},
+                                    {el[0], el[1], el[2], el[3]},
+                                    {eu[0], eu[1], eu[2], eu[3]});
+    native_exp2_tester<sycl::marray<sycl::half, 3>>(q, {ev[0], ev[1], ev[2]},
+                                                {el[0], el[1], el[2]},
+                                                {eu[0], eu[1], eu[2]});
+    native_exp2_tester<sycl::marray<sycl::half, 4>>(q, {ev[0], ev[1], ev[2], ev[3]},
+                                                {el[0], el[1], el[2], el[3]},
+                                                {eu[0], eu[1], eu[2], eu[3]});
+    native_exp2_tester<sycl::half8>(
+        q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]},
+        {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]},
+        {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]});
+    native_exp2_tester<sycl::half16>(
+        q,
+        {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9],
+         ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]},
+        {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9],
+         el[10], el[11], el[12], el[13], el[14], el[15]},
+        {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9],
+         eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]});
+  }
+
+  return 0;
+}
@@ -0,0 +1,169 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+
+template <typename T1, typename T2> class TypeHelper;
+
+template <typename T> bool checkEqual(vec<T, 3> A, size_t B) {
+  T TB = B;
+  return A.x() == TB && A.y() == TB && A.z() == TB;
+}
+
+template <typename T> bool checkEqual(vec<T, 4> A, size_t B) {
+  T TB = B;
+  return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB;
+}
+
+template <typename T, size_t N> bool checkEqual(marray<T, N> A, size_t B) {
+  for (int i = 0; i < N; i++) {
+    if (A[i] != B) {
+      return false;
+    }
+  }
+  return true;
+}
+
+#define COMMA ,
+
+#define HALF_PRECISION_OPERATOR(NAME)                                          \
+  template <typename T>                                                        \
+  void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input,  \
+                                       size_t ref) {                           \
+    {                                                                          \
+      buffer<T, 1> buffer1(&result, 1);                                        \
+      buffer<T, 1> buffer2(&input, 1);                                         \
+      deviceQueue.submit([&](handler &cgh) {                                   \
+        accessor<T, 1, access::mode::write, target::device> res_access(        \
+            buffer1, cgh);                                                     \
+        accessor<T, 1, access::mode::write, target::device> input_access(      \
+            buffer2, cgh);                                                     \
+        cgh.single_task<TypeHelper<class half_precision##NAME, T>>([=]() {     \
+          res_access[0] = sycl::half_precision::NAME(input_access[0]);         \
+        });                                                                    \
+      });                                                                      \
+    }                                                                          \
+    assert(checkEqual(result, ref));                                           \
+  }
+
+HALF_PRECISION_OPERATOR(sin)
+HALF_PRECISION_OPERATOR(tan)
+HALF_PRECISION_OPERATOR(cos)
+HALF_PRECISION_OPERATOR(exp)
+HALF_PRECISION_OPERATOR(exp2)
+HALF_PRECISION_OPERATOR(exp10)
+HALF_PRECISION_OPERATOR(log)
+HALF_PRECISION_OPERATOR(log2)
+HALF_PRECISION_OPERATOR(log10)
+HALF_PRECISION_OPERATOR(sqrt)
+HALF_PRECISION_OPERATOR(rsqrt)
+HALF_PRECISION_OPERATOR(recip)
+
+#undef HALF_PRECISION_OPERATOR
+
+#define HALF_PRECISION_OPERATOR_2(NAME)                                        \
+  template <typename T>                                                        \
+  void half_precision_math_test_2_##NAME(queue &deviceQueue, T result,         \
+                                         T input1, T input2, size_t ref) {     \
+    {                                                                          \
+      buffer<T, 1> buffer1(&result, 1);                                        \
+      buffer<T, 1> buffer2(&input1, 1);                                        \
+      buffer<T, 1> buffer3(&input2, 1);                                        \
+      deviceQueue.submit([&](handler &cgh) {                                   \
+        accessor<T, 1, access::mode::write, target::device> res_access(        \
+            buffer1, cgh);                                                     \
+        accessor<T, 1, access::mode::write, target::device> input1_access(     \
+            buffer2, cgh);                                                     \
+        accessor<T, 1, access::mode::write, target::device> input2_access(     \
+            buffer3, cgh);                                                     \
+        cgh.single_task<TypeHelper<class half_precision2##NAME, T>>([=]() {    \
+          res_access[0] =                                                      \
+              sycl::half_precision::NAME(input1_access[0], input2_access[0]);  \
+        });                                                                    \
+      });                                                                      \
+    }                                                                          \
+    assert(checkEqual(result, ref));                                           \
+  }
+
+HALF_PRECISION_OPERATOR_2(divide)
+HALF_PRECISION_OPERATOR_2(powr)
+
+#undef HALF_PRECISION_OPERATOR_2
+
+#define HALF_PRECISION_TESTS_3(TYPE)                                           \
+  half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0},   \
+                               0);                                             \
+  half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0},   \
+                               0);                                             \
+  half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0},   \
+                               1);                                             \
+  half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0},   \
+                               1);                                             \
+  half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2},  \
+                                4);                                            \
+  half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \
+                                 100);                                         \
+  half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1},   \
+                               0);                                             \
+  half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4},  \
+                                2);                                            \
+  half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1},                \
+                                 TYPE{100, 100, 100}, 2);                      \
+  half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4},  \
+                                2);                                            \
+  half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1},                \
+                                 TYPE{0.25, 0.25, 0.25}, 2);                   \
+  half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1},                \
+                                 TYPE{0.25, 0.25, 0.25}, 4);                   \
+  half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1},               \
+                                  TYPE{2, 2, 2}, TYPE{2, 2, 2}, 4);            \
+  half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1},             \
+                                    TYPE{4, 4, 4}, TYPE{2, 2, 2}, 2);
+
+#define HALF_PRECISION_TESTS_4(TYPE)                                           \
+  half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1},              \
+                               TYPE{0, 0, 0, 0}, 0);                           \
+  half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1},              \
+                               TYPE{0, 0, 0, 0}, 0);                           \
+  half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1},              \
+                               TYPE{0, 0, 0, 0}, 1);                           \
+  half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1},              \
+                               TYPE{0, 0, 0, 0}, 1);                           \
+  half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1},             \
+                                TYPE{2, 2, 2, 2}, 4);                          \
+  half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1},            \
+                                 TYPE{2, 2, 2, 2}, 100);                       \
+  half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1},              \
+                               TYPE{1, 1, 1, 1}, 0);                           \
+  half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1},             \
+                                TYPE{4, 4, 4, 4}, 2);                          \
+  half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1},            \
+                                 TYPE{100, 100, 100, 100}, 2);                 \
+  half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1},             \
+                                TYPE{4, 4, 4, 4}, 2);                          \
+  half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1},            \
+                                 TYPE{0.25, 0.25, 0.25, 0.25}, 2);             \
+  half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1},            \
+                                 TYPE{0.25, 0.25, 0.25, 0.25}, 4);             \
+  half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1},           \
+                                  TYPE{2, 2, 2, 2}, TYPE{2, 2, 2, 2}, 4);      \
+  half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1},         \
+                                    TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2);
+
+int main() {
+  queue deviceQueue;
+
+  HALF_PRECISION_TESTS_3(float3)
+  HALF_PRECISION_TESTS_3(marray<float COMMA 3>)
+
+  HALF_PRECISION_TESTS_4(float4)
+  HALF_PRECISION_TESTS_4(marray<float COMMA 4>)
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
Original file line number	Diff line number	Diff line change
`@@ -166,6 +166,7 @@ template <int N> bool check(vec<float, N> a, vec<float, N> b) {`
`166`	`166`
`167`	`167`	`int main() {`
`168`	`168`	`queue q;`
	`169`	`+ if (q.get_device().has(sycl::aspect::fp16)) {`
`169`	`170`	`float16 a, b, c, d;`
`170`	`171`	`for (int i = 0; i < SZ_max; i++) {`
`171`	`172`	`a[i] = i / (float)SZ_max;`
`@@ -193,6 +194,6 @@ int main() {`
`193`	`194`	`});`
`194`	`195`	`}`
`195`	`196`	`assert(err == 0);`
`196`		`-`
	`197`	`+}`
`197`	`198`	`return 0;`
`198`	`199`	`}`