Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 14622ae

Browse files
authored
[SYCL] Fix warnings in reduction tests; make them more stable and verbose (#394)
* [SYCL] Fix warnings in reduction tests; make them more stable and verbose 1) The main motivation for this patch was the difficulties with reading the diagnostics printed by reduction tests in CI testing. The patch should fix that and also help reproducing the CI issues easier. 2) Disable the test reduction_nd_N_vars.cpp The test was supposed to be disabled by #389, but a wrong syntax was chosen there to do that. Fixed it here. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 3b399b5 commit 14622ae

28 files changed

+746
-987
lines changed

SYCL/Reduction/reduction_big_data.cpp

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@
44
// RUN: %CPU_RUN_PLACEHOLDER %t.out
55

66
// RUNx: %HOST_RUN_PLACEHOLDER %t.out
7-
// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
7+
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and
88
// barrier()
99

1010
// This test performs basic checks of parallel_for(nd_range, reduction, func)
1111
// where the bigger data size and/or non-uniform work-group sizes may cause
1212
// errors.
1313

1414
#include "reduction_utils.hpp"
15-
#include <CL/sycl.hpp>
15+
1616
#include <algorithm>
17-
#include <cassert>
1817

1918
using namespace cl::sycl;
2019

@@ -27,14 +26,14 @@ size_t getSafeMaxWGSize(size_t MaxWGSize, size_t MemSize, size_t OneElemSize) {
2726
return std::min(MaxNumElems / 2, MaxWGSize);
2827
}
2928

30-
template <typename KernelName, typename T, int Dim, class BinaryOperation>
31-
void test(queue &Q, T Identity) {
32-
device Device = Q.get_device();
29+
template <typename Name, typename T, int Dim, class BinaryOperation>
30+
int test(queue &Q, T Identity) {
31+
// It seems enough to test just one case - SYCL2020 reduction.
32+
constexpr bool IsSYCL2020 = true;
3333

34+
device Device = Q.get_device();
3435
std::size_t MaxWGSize = Device.get_info<info::device::max_work_group_size>();
3536
std::size_t LocalMemSize = Device.get_info<info::device::local_mem_size>();
36-
std::cout << "Detected device::max_work_group_size = " << MaxWGSize << "\n";
37-
std::cout << "Detected device::local_mem_size = " << LocalMemSize << "\n";
3837

3938
size_t WGSize = getSafeMaxWGSize(MaxWGSize, LocalMemSize, sizeof(T));
4039

@@ -44,15 +43,12 @@ void test(queue &Q, T Identity) {
4443
size_t NWorkItems = std::min(WGSize * MaxWGSize + 1, MaxGlobalRange);
4544

4645
size_t NWorkGroups = (NWorkItems - 1) / WGSize + 1;
47-
range<1> GlobalRange(NWorkGroups * WGSize);
48-
range<1> LocalRange(WGSize);
49-
nd_range<1> NDRange(GlobalRange, LocalRange);
50-
std::cout << "Running the test with: GlobalRange = " << (NWorkGroups * WGSize)
51-
<< ", LocalRange = " << WGSize << ", NWorkItems = " << NWorkItems
52-
<< "\n";
46+
nd_range<1> NDRange(range<1>{NWorkGroups * WGSize}, range<1>{WGSize});
47+
printTestLabel<T, BinaryOperation>(IsSYCL2020, NDRange);
5348

5449
buffer<T, 1> InBuf(NWorkItems);
5550
buffer<T, 1> OutBuf(1);
51+
(OutBuf.template get_access<access::mode::write>())[0] = Identity;
5652

5753
// Initialize.
5854
BinaryOperation BOp;
@@ -62,25 +58,17 @@ void test(queue &Q, T Identity) {
6258
// Compute.
6359
Q.submit([&](handler &CGH) {
6460
auto In = InBuf.template get_access<access::mode::read>(CGH);
65-
accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
66-
Out(OutBuf, CGH);
67-
CGH.parallel_for<KernelName>(NDRange, ONEAPI::reduction(Out, Identity, BOp),
68-
[=](nd_item<1> NDIt, auto &Sum) {
69-
if (NDIt.get_global_linear_id() < NWorkItems)
70-
Sum.combine(
71-
In[NDIt.get_global_linear_id()]);
72-
});
61+
CGH.parallel_for<Name>(NDRange, sycl::reduction(OutBuf, CGH, Identity, BOp),
62+
[=](nd_item<1> NDIt, auto &Sum) {
63+
if (NDIt.get_global_linear_id() < NWorkItems)
64+
Sum.combine(In[NDIt.get_global_linear_id()]);
65+
});
7366
});
7467

7568
// Check correctness.
7669
auto Out = OutBuf.template get_access<access::mode::read>();
7770
T ComputedOut = *(Out.get_pointer());
78-
if (ComputedOut != CorrectOut) {
79-
std::cout << "Computed value: " << ComputedOut
80-
<< ", Expected value: " << CorrectOut << "\n";
81-
assert(0 && "Wrong value.");
82-
}
83-
std::cout << "Test case passed\n\n";
71+
return checkResults(Q, IsSYCL2020, BOp, NDRange, ComputedOut, CorrectOut);
8472
}
8573

8674
template <typename T> struct BigCustomVec : public CustomVec<T> {
@@ -99,11 +87,13 @@ template <class T> struct BigCustomVecPlus {
9987

10088
int main() {
10189
queue Q;
102-
test<class Test1, float, 0, ONEAPI::maximum<>>(Q, getMinimumFPValue<float>());
90+
printDeviceInfo(Q);
91+
int NumErrors = test<class A1, float, 0, ext::oneapi::maximum<>>(
92+
Q, getMinimumFPValue<float>());
10393

10494
using BCV = BigCustomVec<long long>;
105-
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));
95+
NumErrors += test<class A2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));
10696

107-
std::cout << "Test passed\n";
108-
return 0;
97+
printFinalStatus(NumErrors);
98+
return NumErrors;
10999
}

SYCL/Reduction/reduction_nd_N_vars.cpp

Lines changed: 55 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,13 @@
44
// RUN: %ACC_RUN_PLACEHOLDER %t.out
55

66
// TODO: The test irregularly reports incorrect.
7-
// UNSUPPORTED: TEMPORARY_DISABLED
7+
// REQUIRES: TEMPORARY_DISABLED
88

99
// This test checks handling of parallel_for() accepting nd_range and
1010
// two or more reductions.
1111

1212
#include "reduction_utils.hpp"
1313

14-
#include <CL/sycl.hpp>
15-
16-
#include <cassert>
17-
#include <cmath>
18-
#include <cstdint>
19-
#include <cstdio>
20-
#include <cstdlib>
21-
#include <numeric>
22-
#include <string>
23-
2414
using namespace cl::sycl;
2515

2616
template <typename... Ts> class KNameGroup;
@@ -29,22 +19,15 @@ template <typename T, bool B> class KName;
2919
constexpr access::mode RW = access::mode::read_write;
3020
constexpr access::mode DW = access::mode::discard_write;
3121

32-
template <typename T>
33-
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed,
34-
bool IsSYCL2020) {
35-
bool Success;
36-
if (!std::is_floating_point<T>::value)
37-
Success = (Expected == Computed);
38-
else
39-
Success = std::abs((Expected / Computed) - 1) < 0.5;
40-
41-
if (!Success) {
42-
std::cerr << "Is SYCL2020 mode: " << IsSYCL2020 << std::endl;
43-
std::cerr << TestCaseNum << ": Expected value = " << Expected
44-
<< ", Computed value = " << Computed << "\n";
45-
}
46-
47-
return Success;
22+
template <typename RangeT>
23+
void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range,
24+
bool ToCERR = false) {
25+
std::ostream &OS = ToCERR ? std::cerr : std::cout;
26+
std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI ";
27+
OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode
28+
<< ", Range=" << Range;
29+
if (!ToCERR)
30+
OS << std::endl;
4831
}
4932

5033
// Returns 0 if the test case passed. Otherwise, some non-zero value.
@@ -58,6 +41,10 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
5841
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
5942
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
6043
usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) {
44+
45+
auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
46+
printNVarsTestLabel<>(IsSYCL2020, NDR);
47+
6148
buffer<T1, 1> InBuf1(NWorkItems);
6249
buffer<T2, 1> InBuf2(NWorkItems);
6350
buffer<T3, 1> InBuf3(NWorkItems);
@@ -120,64 +107,33 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
120107
}
121108
}
122109

123-
auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
124-
if constexpr (IsSYCL2020) {
125-
Q.submit([&](handler &CGH) {
126-
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
127-
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
128-
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
129-
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
130-
131-
auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1,
132-
getPropertyList<Mode1>());
133-
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2,
134-
getPropertyList<Mode2>());
135-
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3,
136-
getPropertyList<Mode3>());
137-
auto Redu4 =
138-
sycl::reduction(Out4, IdentityVal4, BOp4, getPropertyList<Mode4>());
139-
140-
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
141-
auto &Sum4) {
142-
size_t I = NDIt.get_global_id(0);
143-
Sum1.combine(In1[I]);
144-
Sum2.combine(In2[I]);
145-
Sum3.combine(In3[I]);
146-
Sum4.combine(In4[I]);
147-
};
148-
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
149-
}).wait();
150-
} else {
151-
// Test ONEAPI reductions
152-
Q.submit([&](handler &CGH) {
153-
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
154-
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
155-
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
156-
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
157-
158-
auto Out1 = OutBuf1.template get_access<Mode1>(CGH);
159-
auto Out2 = OutBuf2.template get_access<Mode2>(CGH);
160-
accessor<T3, 0, Mode3, access::target::global_buffer> Out3(OutBuf3, CGH);
161-
162-
auto Redu1 = ONEAPI::reduction(Out1, IdentityVal1, BOp1);
163-
auto Redu2 = ONEAPI::reduction(Out2, IdentityVal2, BOp2);
164-
auto Redu3 = ONEAPI::reduction(Out3, IdentityVal3, BOp3);
165-
auto Redu4 = ONEAPI::reduction(Out4, IdentityVal4, BOp4);
166-
167-
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
168-
auto &Sum4) {
169-
size_t I = NDIt.get_global_id(0);
170-
Sum1.combine(In1[I]);
171-
Sum2.combine(In2[I]);
172-
Sum3.combine(In3[I]);
173-
Sum4.combine(In4[I]);
174-
};
175-
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
176-
}).wait();
177-
}
110+
Q.submit([&](handler &CGH) {
111+
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
112+
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
113+
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
114+
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);
115+
116+
auto Redu1 =
117+
createReduction<IsSYCL2020, Mode1>(OutBuf1, CGH, IdentityVal1, BOp1);
118+
auto Redu2 =
119+
createReduction<IsSYCL2020, Mode2>(OutBuf2, CGH, IdentityVal2, BOp2);
120+
auto Redu3 =
121+
createReduction<IsSYCL2020, Mode3>(OutBuf3, CGH, IdentityVal3, BOp3);
122+
auto Redu4 = createReduction<IsSYCL2020, Mode4>(Out4, IdentityVal4, BOp4);
123+
124+
auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
125+
auto &Sum4) {
126+
size_t I = NDIt.get_global_id(0);
127+
Sum1.combine(In1[I]);
128+
Sum2.combine(In2[I]);
129+
Sum3.combine(In3[I]);
130+
Sum4.combine(In4[I]);
131+
};
132+
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
133+
}).wait();
178134

179135
// Check the results and free memory.
180-
int Error = 0;
136+
int NumErrors = 0;
181137
{
182138
auto Out1 = OutBuf1.template get_access<access::mode::read>();
183139
auto Out2 = OutBuf2.template get_access<access::mode::read>();
@@ -195,22 +151,23 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
195151
Out4Val = *Out4;
196152
}
197153

198-
Error += cherkResultIsExpected(1, CorrectOut1, Out1[0], IsSYCL2020) ? 0 : 1;
199-
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0], IsSYCL2020) ? 0 : 1;
200-
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0], IsSYCL2020) ? 0 : 1;
201-
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val, IsSYCL2020) ? 0 : 1;
154+
std::string AddInfo = "TestCase=";
155+
NumErrors += checkResults(Q, IsSYCL2020, BOp1, NDR, Out1[0], CorrectOut1,
156+
AddInfo + std::to_string(1));
157+
NumErrors += checkResults(Q, IsSYCL2020, BOp2, NDR, Out2[0], CorrectOut2,
158+
AddInfo + std::to_string(2));
159+
NumErrors += checkResults(Q, IsSYCL2020, BOp3, NDR, Out3[0], CorrectOut3,
160+
AddInfo + std::to_string(3));
161+
NumErrors += checkResults(Q, IsSYCL2020, BOp4, NDR, Out4Val, CorrectOut4,
162+
AddInfo + std::to_string(4));
202163
free(Out4, Q.get_context());
203164
}
204165

205-
if (Error)
206-
std::cerr << "The test failed for nd_range(" << NWorkItems << "," << WGSize
207-
<< ")\n\n";
208-
209-
return Error;
166+
return NumErrors;
210167
}
211168

212169
// Tests both implementations of reduction:
213-
// sycl::reduction and sycl::ONEAPI::reduction
170+
// sycl::reduction and sycl::ext::oneapi::reduction
214171
template <class Name, typename T1, access::mode Mode1, typename T2,
215172
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
216173
access::mode Mode4, class BinaryOperation1, class BinaryOperation2,
@@ -236,18 +193,16 @@ int testBoth(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
236193

237194
int main() {
238195
queue Q;
196+
printDeviceInfo(Q);
239197
int Error = testBoth<class Case1, float, DW, int, RW, short, RW, int, RW>(
240-
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
198+
Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000,
241199
std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16);
242200

243201
auto Add = [](auto x, auto y) { return (x + y); };
244202
Error += testBoth<class Case2, float, RW, int, RW, short, DW, int, DW>(
245-
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
246-
8000, std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);
203+
Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0, 8000,
204+
std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);
247205

248-
if (!Error)
249-
std::cout << "Test passed\n";
250-
else
251-
std::cerr << Error << " test-cases failed\n";
206+
printFinalStatus(Error);
252207
return Error;
253208
}

0 commit comments

Comments
 (0)