Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit f95793b

Browse files
committed
tests for vec/marray math
sycl::math, native and half_precision cases covered. Signed-off-by: jack.kirk <[email protected]>
1 parent 7c83c66 commit f95793b

File tree

5 files changed

+792
-1
lines changed

5 files changed

+792
-1
lines changed

SYCL/Basic/half_builtins.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ template <int N> bool check(vec<float, N> a, vec<float, N> b) {
166166

167167
int main() {
168168
queue q;
169+
if (q.get_device().has(sycl::aspect::fp16)) {
169170
float16 a, b, c, d;
170171
for (int i = 0; i < SZ_max; i++) {
171172
a[i] = i / (float)SZ_max;
@@ -193,6 +194,6 @@ int main() {
193194
});
194195
}
195196
assert(err == 0);
196-
197+
}
197198
return 0;
198199
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this
8+
// test is compiled with the -fsycl-device-code-split flag
9+
10+
#include <CL/sycl.hpp>
11+
#include <cassert>
12+
13+
template <typename T, size_t N>
14+
void assert_out_of_bound(sycl::marray<T, N> val, sycl::marray<T, N> lower,
15+
sycl::marray<T, N> upper) {
16+
for (int i = 0; i < N; i++) {
17+
assert(lower[i] < val[i] && val[i] < upper[i]);
18+
}
19+
}
20+
21+
template <typename T> void assert_out_of_bound(T val, T lower, T upper) {
22+
assert(sycl::all(lower < val && val < upper));
23+
}
24+
25+
template <>
26+
void assert_out_of_bound<float>(float val, float lower, float upper) {
27+
assert(lower < val && val < upper);
28+
}
29+
30+
template <>
31+
void assert_out_of_bound<sycl::half>(sycl::half val, sycl::half lower,
32+
sycl::half upper) {
33+
assert(lower < val && val < upper);
34+
}
35+
36+
template <typename T>
37+
void native_tanh_tester(sycl::queue q, T val, T up, T lo) {
38+
T r = val;
39+
40+
#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
41+
{
42+
sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
43+
q.submit([&](sycl::handler &cgh) {
44+
auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
45+
cgh.single_task([=]() {
46+
AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]);
47+
});
48+
});
49+
}
50+
51+
assert_out_of_bound(r, up, lo);
52+
#else
53+
assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
54+
#endif
55+
}
56+
57+
template <typename T>
58+
void native_exp2_tester(sycl::queue q, T val, T up, T lo) {
59+
T r = val;
60+
61+
#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
62+
{
63+
sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1));
64+
q.submit([&](sycl::handler &cgh) {
65+
auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh);
66+
cgh.single_task([=]() {
67+
AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]);
68+
});
69+
});
70+
}
71+
72+
assert_out_of_bound(r, up, lo);
73+
#else
74+
assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported");
75+
#endif
76+
}
77+
78+
int main() {
79+
80+
sycl::queue q;
81+
82+
const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
83+
-1.7, 1.7, -1.2, 1.2, -3.0, 3.0, -10.0, 10.0};
84+
const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75, -0.1,
85+
-0.94, 0.92, -0.84, 0.82, -1.0, 0.98, -1.10, 0.98};
86+
const double tu[16] = {-0.95, -0.89, -0.75, 0.1, 0.97, 0.91, 0.77, 0.1,
87+
-0.92, 0.94, -0.82, 0.84, -0.98, 1.00, -0.98, 1.10};
88+
89+
native_tanh_tester<float>(q, tv[0], tl[0], tu[0]);
90+
native_tanh_tester<sycl::float2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
91+
{tu[0], tu[1]});
92+
native_tanh_tester<sycl::float3>(
93+
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
94+
95+
native_tanh_tester<sycl::float4>(q, {tv[0], tv[1], tv[2], tv[3]},
96+
{tl[0], tl[1], tl[2], tl[3]},
97+
{tu[0], tu[1], tu[2], tu[3]});
98+
native_tanh_tester<sycl::marray<float, 3>>(q, {tv[0], tv[1], tv[2]},
99+
{tl[0], tl[1], tl[2]},
100+
{tu[0], tu[1], tu[2]});
101+
native_tanh_tester<sycl::marray<float, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
102+
{tl[0], tl[1], tl[2], tl[3]},
103+
{tu[0], tu[1], tu[2], tu[3]});
104+
native_tanh_tester<sycl::marray<float, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
105+
{tl[0], tl[1], tl[2], tl[3]},
106+
{tu[0], tu[1], tu[2], tu[3]});
107+
native_tanh_tester<sycl::float8>(
108+
q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
109+
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
110+
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
111+
native_tanh_tester<sycl::float16>(
112+
q,
113+
{tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
114+
tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
115+
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
116+
tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
117+
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
118+
tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});
119+
120+
if (q.get_device().has(sycl::aspect::fp16)) {
121+
122+
native_tanh_tester<sycl::half>(q, tv[0], tl[0], tu[0]);
123+
native_tanh_tester<sycl::half2>(q, {tv[0], tv[1]}, {tl[0], tl[1]},
124+
{tu[0], tu[1]});
125+
native_tanh_tester<sycl::half3>(
126+
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
127+
native_tanh_tester<sycl::marray<sycl::half, 3>>(
128+
q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]});
129+
native_tanh_tester<sycl::half4>(q, {tv[0], tv[1], tv[2], tv[3]},
130+
{tl[0], tl[1], tl[2], tl[3]},
131+
{tu[0], tu[1], tu[2], tu[3]});
132+
native_tanh_tester<sycl::marray<sycl::half, 4>>(q, {tv[0], tv[1], tv[2], tv[3]},
133+
{tl[0], tl[1], tl[2], tl[3]},
134+
{tu[0], tu[1], tu[2], tu[3]});
135+
native_tanh_tester<sycl::half8>(
136+
q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]},
137+
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]},
138+
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]});
139+
native_tanh_tester<sycl::half16>(
140+
q,
141+
{tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9],
142+
tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]},
143+
{tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9],
144+
tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]},
145+
{tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9],
146+
tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]});
147+
148+
const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0,
149+
-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0};
150+
const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9,
151+
0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9};
152+
const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1,
153+
0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1};
154+
155+
native_exp2_tester<sycl::half>(q, ev[0], el[0], eu[0]);
156+
native_exp2_tester<sycl::half2>(q, {ev[0], ev[1]}, {el[0], el[1]},
157+
{eu[0], eu[1]});
158+
native_exp2_tester<sycl::half3>(
159+
q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]});
160+
native_exp2_tester<sycl::half4>(q, {ev[0], ev[1], ev[2], ev[3]},
161+
{el[0], el[1], el[2], el[3]},
162+
{eu[0], eu[1], eu[2], eu[3]});
163+
native_exp2_tester<sycl::marray<sycl::half, 3>>(q, {ev[0], ev[1], ev[2]},
164+
{el[0], el[1], el[2]},
165+
{eu[0], eu[1], eu[2]});
166+
native_exp2_tester<sycl::marray<sycl::half, 4>>(q, {ev[0], ev[1], ev[2], ev[3]},
167+
{el[0], el[1], el[2], el[3]},
168+
{eu[0], eu[1], eu[2], eu[3]});
169+
native_exp2_tester<sycl::half8>(
170+
q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]},
171+
{el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]},
172+
{eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]});
173+
native_exp2_tester<sycl::half16>(
174+
q,
175+
{ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9],
176+
ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]},
177+
{el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9],
178+
el[10], el[11], el[12], el[13], el[14], el[15]},
179+
{eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9],
180+
eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]});
181+
}
182+
183+
return 0;
184+
}
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
4+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
#include <sycl/sycl.hpp>
8+
9+
using namespace sycl;
10+
11+
template <typename T1, typename T2> class TypeHelper;
12+
13+
template <typename T> bool checkEqual(vec<T, 3> A, size_t B) {
14+
T TB = B;
15+
return A.x() == TB && A.y() == TB && A.z() == TB;
16+
}
17+
18+
template <typename T> bool checkEqual(vec<T, 4> A, size_t B) {
19+
T TB = B;
20+
return A.x() == TB && A.y() == TB && A.z() == TB && A.w() == TB;
21+
}
22+
23+
template <typename T, size_t N> bool checkEqual(marray<T, N> A, size_t B) {
24+
for (int i = 0; i < N; i++) {
25+
if (A[i] != B) {
26+
return false;
27+
}
28+
}
29+
return true;
30+
}
31+
32+
#define COMMA ,
33+
34+
#define HALF_PRECISION_OPERATOR(NAME) \
35+
template <typename T> \
36+
void half_precision_math_test_##NAME(queue &deviceQueue, T result, T input, \
37+
size_t ref) { \
38+
{ \
39+
buffer<T, 1> buffer1(&result, 1); \
40+
buffer<T, 1> buffer2(&input, 1); \
41+
deviceQueue.submit([&](handler &cgh) { \
42+
accessor<T, 1, access::mode::write, target::device> res_access( \
43+
buffer1, cgh); \
44+
accessor<T, 1, access::mode::write, target::device> input_access( \
45+
buffer2, cgh); \
46+
cgh.single_task<TypeHelper<class half_precision##NAME, T>>([=]() { \
47+
res_access[0] = sycl::half_precision::NAME(input_access[0]); \
48+
}); \
49+
}); \
50+
} \
51+
assert(checkEqual(result, ref)); \
52+
}
53+
54+
HALF_PRECISION_OPERATOR(sin)
55+
HALF_PRECISION_OPERATOR(tan)
56+
HALF_PRECISION_OPERATOR(cos)
57+
HALF_PRECISION_OPERATOR(exp)
58+
HALF_PRECISION_OPERATOR(exp2)
59+
HALF_PRECISION_OPERATOR(exp10)
60+
HALF_PRECISION_OPERATOR(log)
61+
HALF_PRECISION_OPERATOR(log2)
62+
HALF_PRECISION_OPERATOR(log10)
63+
HALF_PRECISION_OPERATOR(sqrt)
64+
HALF_PRECISION_OPERATOR(rsqrt)
65+
HALF_PRECISION_OPERATOR(recip)
66+
67+
#undef HALF_PRECISION_OPERATOR
68+
69+
#define HALF_PRECISION_OPERATOR_2(NAME) \
70+
template <typename T> \
71+
void half_precision_math_test_2_##NAME(queue &deviceQueue, T result, \
72+
T input1, T input2, size_t ref) { \
73+
{ \
74+
buffer<T, 1> buffer1(&result, 1); \
75+
buffer<T, 1> buffer2(&input1, 1); \
76+
buffer<T, 1> buffer3(&input2, 1); \
77+
deviceQueue.submit([&](handler &cgh) { \
78+
accessor<T, 1, access::mode::write, target::device> res_access( \
79+
buffer1, cgh); \
80+
accessor<T, 1, access::mode::write, target::device> input1_access( \
81+
buffer2, cgh); \
82+
accessor<T, 1, access::mode::write, target::device> input2_access( \
83+
buffer3, cgh); \
84+
cgh.single_task<TypeHelper<class half_precision2##NAME, T>>([=]() { \
85+
res_access[0] = \
86+
sycl::half_precision::NAME(input1_access[0], input2_access[0]); \
87+
}); \
88+
}); \
89+
} \
90+
assert(checkEqual(result, ref)); \
91+
}
92+
93+
HALF_PRECISION_OPERATOR_2(divide)
94+
HALF_PRECISION_OPERATOR_2(powr)
95+
96+
#undef HALF_PRECISION_OPERATOR_2
97+
98+
#define HALF_PRECISION_TESTS_3(TYPE) \
99+
half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \
100+
0); \
101+
half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \
102+
0); \
103+
half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \
104+
1); \
105+
half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1}, TYPE{0, 0, 0}, \
106+
1); \
107+
half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \
108+
4); \
109+
half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1}, TYPE{2, 2, 2}, \
110+
100); \
111+
half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1}, TYPE{1, 1, 1}, \
112+
0); \
113+
half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \
114+
2); \
115+
half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1}, \
116+
TYPE{100, 100, 100}, 2); \
117+
half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1}, TYPE{4, 4, 4}, \
118+
2); \
119+
half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1}, \
120+
TYPE{0.25, 0.25, 0.25}, 2); \
121+
half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1}, \
122+
TYPE{0.25, 0.25, 0.25}, 4); \
123+
half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1}, \
124+
TYPE{2, 2, 2}, TYPE{2, 2, 2}, 4); \
125+
half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1}, \
126+
TYPE{4, 4, 4}, TYPE{2, 2, 2}, 2);
127+
128+
#define HALF_PRECISION_TESTS_4(TYPE) \
129+
half_precision_math_test_sin(deviceQueue, TYPE{-1, -1, -1, -1}, \
130+
TYPE{0, 0, 0, 0}, 0); \
131+
half_precision_math_test_tan(deviceQueue, TYPE{-1, -1, -1, -1}, \
132+
TYPE{0, 0, 0, 0}, 0); \
133+
half_precision_math_test_cos(deviceQueue, TYPE{-1, -1, -1, -1}, \
134+
TYPE{0, 0, 0, 0}, 1); \
135+
half_precision_math_test_exp(deviceQueue, TYPE{-1, -1, -1, -1}, \
136+
TYPE{0, 0, 0, 0}, 1); \
137+
half_precision_math_test_exp2(deviceQueue, TYPE{-1, -1, -1, -1}, \
138+
TYPE{2, 2, 2, 2}, 4); \
139+
half_precision_math_test_exp10(deviceQueue, TYPE{-1, -1, -1, -1}, \
140+
TYPE{2, 2, 2, 2}, 100); \
141+
half_precision_math_test_log(deviceQueue, TYPE{-1, -1, -1, -1}, \
142+
TYPE{1, 1, 1, 1}, 0); \
143+
half_precision_math_test_log2(deviceQueue, TYPE{-1, -1, -1, -1}, \
144+
TYPE{4, 4, 4, 4}, 2); \
145+
half_precision_math_test_log10(deviceQueue, TYPE{-1, -1, -1, -1}, \
146+
TYPE{100, 100, 100, 100}, 2); \
147+
half_precision_math_test_sqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \
148+
TYPE{4, 4, 4, 4}, 2); \
149+
half_precision_math_test_rsqrt(deviceQueue, TYPE{-1, -1, -1, -1}, \
150+
TYPE{0.25, 0.25, 0.25, 0.25}, 2); \
151+
half_precision_math_test_recip(deviceQueue, TYPE{-1, -1, -1, -1}, \
152+
TYPE{0.25, 0.25, 0.25, 0.25}, 4); \
153+
half_precision_math_test_2_powr(deviceQueue, TYPE{-1, -1, -1, -1}, \
154+
TYPE{2, 2, 2, 2}, TYPE{2, 2, 2, 2}, 4); \
155+
half_precision_math_test_2_divide(deviceQueue, TYPE{-1, -1, -1, -1}, \
156+
TYPE{4, 4, 4, 4}, TYPE{2, 2, 2, 2}, 2);
157+
158+
int main() {
159+
queue deviceQueue;
160+
161+
HALF_PRECISION_TESTS_3(float3)
162+
HALF_PRECISION_TESTS_3(marray<float COMMA 3>)
163+
164+
HALF_PRECISION_TESTS_4(float4)
165+
HALF_PRECISION_TESTS_4(marray<float COMMA 4>)
166+
167+
std::cout << "Pass" << std::endl;
168+
return 0;
169+
}

0 commit comments

Comments
 (0)