From 2dfd0305e6a9cb31080e96ae82a1dc88c9eff4c6 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Tue, 27 Jul 2021 23:14:05 -0700 Subject: [PATCH 1/2] [SYCL] Fix warnings in reduction tests; make them more stable and verbose The main motivation for this patch was the difficulties with reading the diagnostics printed by reduction tests in CI testing. The patch should fix that and also help reproducing the CI issues easier. Signed-off-by: Vyacheslav N Klochkov --- SYCL/Reduction/reduction_big_data.cpp | 54 +++--- SYCL/Reduction/reduction_nd_N_vars.cpp | 153 ++++++---------- SYCL/Reduction/reduction_nd_conditional.cpp | 88 +++------ SYCL/Reduction/reduction_nd_ext_double.cpp | 2 +- SYCL/Reduction/reduction_nd_ext_half.cpp | 2 +- SYCL/Reduction/reduction_nd_ext_type.hpp | 110 +++--------- SYCL/Reduction/reduction_nd_lambda.cpp | 76 +++----- SYCL/Reduction/reduction_nd_range_scalar.hpp | 59 ++++++ SYCL/Reduction/reduction_nd_s0_dw.cpp | 109 +++-------- SYCL/Reduction/reduction_nd_s0_rw.cpp | 110 +++--------- SYCL/Reduction/reduction_nd_s1_dw.cpp | 104 +++-------- SYCL/Reduction/reduction_nd_s1_rw.cpp | 107 +++-------- SYCL/Reduction/reduction_placeholder.cpp | 76 ++++---- .../reduction_queue_parallel_for.cpp | 57 +++--- SYCL/Reduction/reduction_range_1d_s0_dw.cpp | 17 +- SYCL/Reduction/reduction_range_1d_s0_rw.cpp | 18 +- SYCL/Reduction/reduction_range_1d_s1_dw.cpp | 28 +-- SYCL/Reduction/reduction_range_1d_s1_rw.cpp | 22 +-- SYCL/Reduction/reduction_range_2d_s1_dw.cpp | 13 +- SYCL/Reduction/reduction_range_2d_s1_rw.cpp | 13 +- SYCL/Reduction/reduction_range_3d_s1_dw.cpp | 16 +- SYCL/Reduction/reduction_range_3d_s1_rw.cpp | 16 +- SYCL/Reduction/reduction_range_lambda.cpp | 40 +++-- SYCL/Reduction/reduction_range_scalar.hpp | 71 ++------ SYCL/Reduction/reduction_reducer_op_eq.cpp | 30 ++-- SYCL/Reduction/reduction_usm.cpp | 93 +++++----- SYCL/Reduction/reduction_usm_dw.cpp | 77 ++++---- SYCL/Reduction/reduction_utils.hpp | 170 ++++++++++++++++-- 28 files changed, 745 insertions(+), 986 deletions(-) create mode 100644 SYCL/Reduction/reduction_nd_range_scalar.hpp diff --git a/SYCL/Reduction/reduction_big_data.cpp b/SYCL/Reduction/reduction_big_data.cpp index 1c8d67068b..e7fd13d1a5 100644 --- a/SYCL/Reduction/reduction_big_data.cpp +++ b/SYCL/Reduction/reduction_big_data.cpp @@ -4,7 +4,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUNx: %HOST_RUN_PLACEHOLDER %t.out -// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and // barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) @@ -12,9 +12,8 @@ // errors. #include "reduction_utils.hpp" -#include + #include -#include using namespace cl::sycl; @@ -27,14 +26,14 @@ size_t getSafeMaxWGSize(size_t MaxWGSize, size_t MemSize, size_t OneElemSize) { return std::min(MaxNumElems / 2, MaxWGSize); } -template -void test(queue &Q, T Identity) { - device Device = Q.get_device(); +template +int test(queue &Q, T Identity) { + // It seems enough to test just one case - SYCL2020 reduction. + constexpr bool IsSYCL2020 = true; + device Device = Q.get_device(); std::size_t MaxWGSize = Device.get_info(); std::size_t LocalMemSize = Device.get_info(); - std::cout << "Detected device::max_work_group_size = " << MaxWGSize << "\n"; - std::cout << "Detected device::local_mem_size = " << LocalMemSize << "\n"; size_t WGSize = getSafeMaxWGSize(MaxWGSize, LocalMemSize, sizeof(T)); @@ -44,15 +43,12 @@ void test(queue &Q, T Identity) { size_t NWorkItems = std::min(WGSize * MaxWGSize + 1, MaxGlobalRange); size_t NWorkGroups = (NWorkItems - 1) / WGSize + 1; - range<1> GlobalRange(NWorkGroups * WGSize); - range<1> LocalRange(WGSize); - nd_range<1> NDRange(GlobalRange, LocalRange); - std::cout << "Running the test with: GlobalRange = " << (NWorkGroups * WGSize) - << ", LocalRange = " << WGSize << ", NWorkItems = " << NWorkItems - << "\n"; + nd_range<1> NDRange(range<1>{NWorkGroups * WGSize}, range<1>{WGSize}); + printTestLabel(IsSYCL2020, NDRange); buffer InBuf(NWorkItems); buffer OutBuf(1); + (OutBuf.template get_access())[0] = Identity; // Initialize. BinaryOperation BOp; @@ -62,25 +58,17 @@ void test(queue &Q, T Identity) { // Compute. Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - CGH.parallel_for(NDRange, ONEAPI::reduction(Out, Identity, BOp), - [=](nd_item<1> NDIt, auto &Sum) { - if (NDIt.get_global_linear_id() < NWorkItems) - Sum.combine( - In[NDIt.get_global_linear_id()]); - }); + CGH.parallel_for(NDRange, sycl::reduction(OutBuf, CGH, Identity, BOp), + [=](nd_item<1> NDIt, auto &Sum) { + if (NDIt.get_global_linear_id() < NWorkItems) + Sum.combine(In[NDIt.get_global_linear_id()]); + }); }); // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } - std::cout << "Test case passed\n\n"; + return checkResults(Q, IsSYCL2020, BOp, NDRange, ComputedOut, CorrectOut); } template struct BigCustomVec : public CustomVec { @@ -99,11 +87,13 @@ template struct BigCustomVecPlus { int main() { queue Q; - test>(Q, getMinimumFPValue()); + printDeviceInfo(Q); + int NumErrors = test>( + Q, getMinimumFPValue()); using BCV = BigCustomVec; - test>(Q, BCV(0)); + NumErrors += test>(Q, BCV(0)); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_N_vars.cpp b/SYCL/Reduction/reduction_nd_N_vars.cpp index 1fc8cfadc7..11373a8e3b 100644 --- a/SYCL/Reduction/reduction_nd_N_vars.cpp +++ b/SYCL/Reduction/reduction_nd_N_vars.cpp @@ -11,16 +11,6 @@ #include "reduction_utils.hpp" -#include - -#include -#include -#include -#include -#include -#include -#include - using namespace cl::sycl; template class KNameGroup; @@ -29,22 +19,15 @@ template class KName; constexpr access::mode RW = access::mode::read_write; constexpr access::mode DW = access::mode::discard_write; -template -bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed, - bool IsSYCL2020) { - bool Success; - if (!std::is_floating_point::value) - Success = (Expected == Computed); - else - Success = std::abs((Expected / Computed) - 1) < 0.5; - - if (!Success) { - std::cerr << "Is SYCL2020 mode: " << IsSYCL2020 << std::endl; - std::cerr << TestCaseNum << ": Expected value = " << Expected - << ", Computed value = " << Computed << "\n"; - } - - return Success; +template +void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range, + bool ToCERR = false) { + std::ostream &OS = ToCERR ? std::cerr : std::cout; + std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI "; + OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode + << ", Range=" << Range; + if (!ToCERR) + OS << std::endl; } // Returns 0 if the test case passed. Otherwise, some non-zero value. @@ -58,6 +41,10 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1, T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3, T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4, usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) { + + auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}}; + printNVarsTestLabel<>(IsSYCL2020, NDR); + buffer InBuf1(NWorkItems); buffer InBuf2(NWorkItems); buffer InBuf3(NWorkItems); @@ -120,64 +107,33 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1, } } - auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}}; - if constexpr (IsSYCL2020) { - Q.submit([&](handler &CGH) { - auto In1 = InBuf1.template get_access(CGH); - auto In2 = InBuf2.template get_access(CGH); - auto In3 = InBuf3.template get_access(CGH); - auto In4 = InBuf4.template get_access(CGH); - - auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1, - getPropertyList()); - auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2, - getPropertyList()); - auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3, - getPropertyList()); - auto Redu4 = - sycl::reduction(Out4, IdentityVal4, BOp4, getPropertyList()); - - auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3, - auto &Sum4) { - size_t I = NDIt.get_global_id(0); - Sum1.combine(In1[I]); - Sum2.combine(In2[I]); - Sum3.combine(In3[I]); - Sum4.combine(In4[I]); - }; - CGH.parallel_for(NDR, Redu1, Redu2, Redu3, Redu4, Lambda); - }).wait(); - } else { - // Test ONEAPI reductions - Q.submit([&](handler &CGH) { - auto In1 = InBuf1.template get_access(CGH); - auto In2 = InBuf2.template get_access(CGH); - auto In3 = InBuf3.template get_access(CGH); - auto In4 = InBuf4.template get_access(CGH); - - auto Out1 = OutBuf1.template get_access(CGH); - auto Out2 = OutBuf2.template get_access(CGH); - accessor Out3(OutBuf3, CGH); - - auto Redu1 = ONEAPI::reduction(Out1, IdentityVal1, BOp1); - auto Redu2 = ONEAPI::reduction(Out2, IdentityVal2, BOp2); - auto Redu3 = ONEAPI::reduction(Out3, IdentityVal3, BOp3); - auto Redu4 = ONEAPI::reduction(Out4, IdentityVal4, BOp4); - - auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3, - auto &Sum4) { - size_t I = NDIt.get_global_id(0); - Sum1.combine(In1[I]); - Sum2.combine(In2[I]); - Sum3.combine(In3[I]); - Sum4.combine(In4[I]); - }; - CGH.parallel_for(NDR, Redu1, Redu2, Redu3, Redu4, Lambda); - }).wait(); - } + Q.submit([&](handler &CGH) { + auto In1 = InBuf1.template get_access(CGH); + auto In2 = InBuf2.template get_access(CGH); + auto In3 = InBuf3.template get_access(CGH); + auto In4 = InBuf4.template get_access(CGH); + + auto Redu1 = + createReduction(OutBuf1, CGH, IdentityVal1, BOp1); + auto Redu2 = + createReduction(OutBuf2, CGH, IdentityVal2, BOp2); + auto Redu3 = + createReduction(OutBuf3, CGH, IdentityVal3, BOp3); + auto Redu4 = createReduction(Out4, IdentityVal4, BOp4); + + auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3, + auto &Sum4) { + size_t I = NDIt.get_global_id(0); + Sum1.combine(In1[I]); + Sum2.combine(In2[I]); + Sum3.combine(In3[I]); + Sum4.combine(In4[I]); + }; + CGH.parallel_for(NDR, Redu1, Redu2, Redu3, Redu4, Lambda); + }).wait(); // Check the results and free memory. - int Error = 0; + int NumErrors = 0; { auto Out1 = OutBuf1.template get_access(); auto Out2 = OutBuf2.template get_access(); @@ -195,22 +151,23 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1, Out4Val = *Out4; } - Error += cherkResultIsExpected(1, CorrectOut1, Out1[0], IsSYCL2020) ? 0 : 1; - Error += cherkResultIsExpected(2, CorrectOut2, Out2[0], IsSYCL2020) ? 0 : 1; - Error += cherkResultIsExpected(3, CorrectOut3, Out3[0], IsSYCL2020) ? 0 : 1; - Error += cherkResultIsExpected(4, CorrectOut4, Out4Val, IsSYCL2020) ? 0 : 1; + std::string AddInfo = "TestCase="; + NumErrors += checkResults(Q, IsSYCL2020, BOp1, NDR, Out1[0], CorrectOut1, + AddInfo + std::to_string(1)); + NumErrors += checkResults(Q, IsSYCL2020, BOp2, NDR, Out2[0], CorrectOut2, + AddInfo + std::to_string(2)); + NumErrors += checkResults(Q, IsSYCL2020, BOp3, NDR, Out3[0], CorrectOut3, + AddInfo + std::to_string(3)); + NumErrors += checkResults(Q, IsSYCL2020, BOp4, NDR, Out4Val, CorrectOut4, + AddInfo + std::to_string(4)); free(Out4, Q.get_context()); } - if (Error) - std::cerr << "The test failed for nd_range(" << NWorkItems << "," << WGSize - << ")\n\n"; - - return Error; + return NumErrors; } // Tests both implementations of reduction: -// sycl::reduction and sycl::ONEAPI::reduction +// sycl::reduction and sycl::ext::oneapi::reduction template ( - Q, 0, 1000, std::plus{}, 0, 2000, std::plus<>{}, 0, 4000, + Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000, std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16); auto Add = [](auto x, auto y) { return (x + y); }; Error += testBoth( - Q, 0, 1000, std::plus{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0, - 8000, std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5); + Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0, 8000, + std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5); - if (!Error) - std::cout << "Test passed\n"; - else - std::cerr << Error << " test-cases failed\n"; + printFinalStatus(Error); return Error; } diff --git a/SYCL/Reduction/reduction_nd_conditional.cpp b/SYCL/Reduction/reduction_nd_conditional.cpp index 48c2cf56f3..d1bf4c0f05 100644 --- a/SYCL/Reduction/reduction_nd_conditional.cpp +++ b/SYCL/Reduction/reduction_nd_conditional.cpp @@ -7,8 +7,7 @@ // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reduction and conditional increment of the reduction variable. -#include -#include +#include "reduction_utils.hpp" using namespace cl::sycl; @@ -18,10 +17,7 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, ExpectedOut = Identity; auto In = InBuf.template get_access(); for (int I = 0; I < N; ++I) { - if (std::is_same>::value) - In[I] = 1 + (((I % 37) == 0) ? 1 : 0); - else - In[I] = I + 1 + 1.1; + In[I] = I + 1; if (I < 2) ExpectedOut = BOp(ExpectedOut, 99); @@ -32,79 +28,51 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, } }; -template struct Vec { - Vec() : X(0), Y(0) {} - Vec(T X, T Y) : X(X), Y(Y) {} - Vec(T V) : X(V), Y(V) {} - bool operator==(const Vec &P) const { return P.X == X && P.Y == Y; } - bool operator!=(const Vec &P) const { return !(*this == P); } - T X; - T Y; -}; -template bool operator==(const Vec &A, const Vec &B) { - return A.X == B.X && A.Y == B.Y; -} -template -std::ostream &operator<<(std::ostream &OS, const Vec &P) { - return OS << "(" << P.X << ", " << P.Y << ")"; -} - -template struct VecPlus { - using P = Vec; - P operator()(const P &A, const P &B) const { return P(A.X + B.X, A.Y + B.Y); } -}; - -template -void test(queue &Q, T Identity, size_t WGSize, size_t NWItems) { +template +int test(queue &Q, T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + printTestLabel(true /*SYCL2020*/, NDRange); + // Initialize. BinaryOperation BOp; T CorrectOut; initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); + (OutBuf.template get_access())[0] = Identity; // Compute. Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - range<1> GlobalRange(NWItems); - range<1> LocalRange(WGSize); - nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for( - NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - size_t I = NDIt.get_global_linear_id(); - if (I < 2) - Sum.combine(T(99)); - else if (I % 3) - Sum.combine(In[I]); - else - ; // do nothing. - }); + auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp); + CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { + size_t I = NDIt.get_global_linear_id(); + if (I < 2) + Sum.combine(T(99)); + else if (I % 3) + Sum.combine(In[I]); + else + ; // do nothing. + }); }); // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } + return checkResults(Q, true /*SYCL2020*/, BOp, NDRange, ComputedOut, + CorrectOut); } int main() { queue Q; - test>(Q, 0, 2, 2); - test>(Q, 0, 7, 7); - test>(Q, 0, 2, 64); - test>(Q, 0, 16, 256); + printDeviceInfo(Q); + + int NumErrors = 0; + NumErrors += test>(Q, 0, 2, 2); + NumErrors += test>(Q, 0, 7, 7); + NumErrors += test>(Q, 0, 1, 1025); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_ext_double.cpp b/SYCL/Reduction/reduction_nd_ext_double.cpp index aacbce14cb..88b375ca16 100644 --- a/SYCL/Reduction/reduction_nd_ext_double.cpp +++ b/SYCL/Reduction/reduction_nd_ext_double.cpp @@ -11,4 +11,4 @@ #include "reduction_nd_ext_type.hpp" -int main() { return runTests("cl_khr_fp64"); } +int main() { return runTests(sycl::aspect::fp64); } diff --git a/SYCL/Reduction/reduction_nd_ext_half.cpp b/SYCL/Reduction/reduction_nd_ext_half.cpp index 7bb9f6f543..e0e88c05ca 100644 --- a/SYCL/Reduction/reduction_nd_ext_half.cpp +++ b/SYCL/Reduction/reduction_nd_ext_half.cpp @@ -16,4 +16,4 @@ #include "reduction_nd_ext_type.hpp" -int main() { return runTests("cl_khr_fp16"); } +int main() { return runTests(sycl::aspect::fp16); } diff --git a/SYCL/Reduction/reduction_nd_ext_type.hpp b/SYCL/Reduction/reduction_nd_ext_type.hpp index 0ee925a615..f30768c428 100644 --- a/SYCL/Reduction/reduction_nd_ext_type.hpp +++ b/SYCL/Reduction/reduction_nd_ext_type.hpp @@ -2,103 +2,47 @@ // with types that may require additional runtime checks for extensions // supported by the device, e.g. 'half' or 'double' -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -template class KName; -constexpr access::mode RW = access::mode::read_write; -constexpr access::mode DW = access::mode::discard_write; +int NumErrors = 0; -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - BinaryOperation BOp; - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - if (Mode == access::mode::read_write) - CorrectOut = BOp(CorrectOut, Init); - - (OutBuf.template get_access())[0] = Init; - - // Compute. +template +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - auto Redu = - sycl::reduction(OutBuf, CGH, Identity, BOp, getPropertyList()); - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } - - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - T MaxDiff = 4 * std::numeric_limits::epsilon() * - std::fabs(ComputedOut + CorrectOut); - T CompDiff = std::fabs(static_cast(ComputedOut - CorrectOut)); - if (CompDiff > MaxDiff) { - std::cerr << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cerr << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff - << ", ComputedDiff = " << CompDiff << std::endl; - assert(0 && "Wrong value."); - } -} - -template -void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, false, T, Dim, Mode, BinaryOperation>( - Q, Identity, Init, WGSize, NWItems); - - test, true, T, Dim, Mode, BinaryOperation>( - Q, Identity, Init, WGSize, NWItems); + NumErrors += testBoth(Q, Identity, Init, BOp, NDRange); } -template int runTests(const string_class &ExtensionName) { +template int runTests(sycl::aspect ExtAspect) { queue Q; + printDeviceInfo(Q); device D = Q.get_device(); - if (!D.is_host() && !D.has_extension(ExtensionName)) { + if (!D.is_host() && !D.has(ExtAspect)) { std::cout << "Test skipped\n"; return 0; } - testBoth>(Q, 1, 77, 4, 4); + constexpr access::mode RW = access::mode::read_write; + constexpr access::mode DW = access::mode::discard_write; + + tests(Q, 1, 77, std::multiplies{}, 4, 4); + tests(Q, 1, 77, std::multiplies{}, 4, 8); + + tests(Q, 0, 77, std::plus{}, 4, 32); + tests(Q, 0, 33, std::plus{}, 3, 3 * 5); - testBoth>(Q, 0, 77, 4, 64); - testBoth>(Q, 0, 33, 3, 3 * 5); + tests(Q, getMaximumFPValue(), -10.0, + ext::oneapi::minimum{}, 7, 7 * 512); + tests(Q, getMaximumFPValue(), 99.0, + ext::oneapi::minimum{}, 7, 7); - testBoth>(Q, getMaximumFPValue(), - -10.0, 7, 7); - testBoth>(Q, getMaximumFPValue(), - 99.0, 7, 7); - testBoth>(Q, getMaximumFPValue(), - -99.0, 3, 3); + tests(Q, getMinimumFPValue(), 99.0, + ext::oneapi::maximum<>{}, 3, 3); + tests(Q, getMinimumFPValue(), 99.0, + ext::oneapi::maximum<>{}, 7, 7 * 5); - testBoth>(Q, getMinimumFPValue(), - 99.0, 3, 3); - testBoth>(Q, getMinimumFPValue(), - 99.0, 7, 7 * 5); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_lambda.cpp b/SYCL/Reduction/reduction_nd_lambda.cpp index 0fe7b29779..334385b65b 100644 --- a/SYCL/Reduction/reduction_nd_lambda.cpp +++ b/SYCL/Reduction/reduction_nd_lambda.cpp @@ -6,72 +6,42 @@ // This test performs basic checks of parallel_for(nd_range, reduction, lambda) -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -// Note that this function is created only to test that if the accessor -// object passed to ONEAPI::reduction is destroyed right after -// ONEAPI::reduction creation, then the reduction still works properly, -// i.e. it holds a COPY of user's accessor. -template -auto createReduction(sycl::buffer Buffer, handler &CGH, T Identity, - BOpT BOp) { - auto Acc = Buffer.template get_access(CGH); - return ONEAPI::reduction(Acc, Identity, BOp); -} - -template -void test(queue &Q, T Identity, BinaryOperation BOp, size_t WGSize, - size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - - // Compute. - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - auto Redu = createReduction(OutBuf, CGH, Identity, BOp); +template class MName; - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - CGH.parallel_for(NDRange, Redu, - [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); +int NumErrors = 0; - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cerr << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cerr << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } +template +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { + constexpr access::mode DW = access::mode::discard_write; + constexpr access::mode RW = access::mode::read_write; + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + NumErrors += testBoth, DW>(Q, Identity, Init, BOp, NDRange); + NumErrors += testBoth, RW>(Q, Identity, Init, BOp, NDRange); } int main() { queue Q; - test( - Q, 0, [](auto x, auto y) { return (x + y); }, 1, 1024); - test( - Q, 0, [](auto x, auto y) { return (x * y); }, 8, 32); + printDeviceInfo(Q); + tests( + Q, 0, 9, [](auto x, auto y) { return (x + y); }, 1, 1024); + tests( + Q, 1, 2, [](auto x, auto y) { return (x * y); }, 8, 16); // Check with CUSTOM type. - test>( - Q, CustomVec(0), - [](auto x, auto y) { + using CV = CustomVec; + tests( + Q, CV(0), CV(2021), + [](auto X, auto Y) { CustomVecPlus BOp; - return BOp(x, y); + return BOp(X, Y); }, 4, 64); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_range_scalar.hpp b/SYCL/Reduction/reduction_nd_range_scalar.hpp new file mode 100644 index 0000000000..cdfdc897dc --- /dev/null +++ b/SYCL/Reduction/reduction_nd_range_scalar.hpp @@ -0,0 +1,59 @@ +// This test performs basic checks of parallel_for(range, reduction, func) +// with reductions initialized with 1-dimensional buffer/accessor +// accessing a scalar holding the reduction result. + +#include "reduction_utils.hpp" + +using namespace cl::sycl; + +template class KName; + +template +int test(queue &Q, T Identity, T Init, BinaryOperation BOp, + const nd_range &Range) { + printTestLabel(IsSYCL2020, Range); + + // Skip the test for such big arrays now. + constexpr size_t TwoGB = 2LL * 1024 * 1024 * 1024; + range GlobalRange = Range.get_global_range(); + if (GlobalRange.size() > TwoGB) + return 0; + + buffer InBuf(GlobalRange); + buffer OutBuf(1); + + // Initialize. + T CorrectOut; + initInputData(InBuf, CorrectOut, Identity, BOp, GlobalRange); + if constexpr (Mode == access::mode::read_write) { + CorrectOut = BOp(CorrectOut, Init); + } + + // The value assigned here must be discarded (if IsReadWrite is true). + // Verify that it is really discarded and assign some value. + (OutBuf.template get_access())[0] = Init; + + // Compute. + Q.submit([&](handler &CGH) { + auto In = InBuf.template get_access(CGH); + auto Redu = + createReduction(OutBuf, CGH, Identity, BOp); + CGH.parallel_for(Range, Redu, [=](nd_item NDIt, auto &Sum) { + Sum.combine(In[NDIt.get_global_id()]); + }); + }); + + // Check correctness. + auto Out = OutBuf.template get_access(); + T ComputedOut = *(Out.get_pointer()); + return checkResults(Q, IsSYCL2020, BOp, Range, ComputedOut, CorrectOut); +} + +template +int testBoth(queue &Q, T Identity, T Init, BinaryOperation BOp, + const nd_range &Range) { + return test, false, Mode>(Q, Identity, Init, BOp, Range) + + test, true, Mode>(Q, Identity, Init, BOp, Range); +} diff --git a/SYCL/Reduction/reduction_nd_s0_dw.cpp b/SYCL/Reduction/reduction_nd_s0_dw.cpp index 43ce0f6566..6dda0ca243 100644 --- a/SYCL/Reduction/reduction_nd_s0_dw.cpp +++ b/SYCL/Reduction/reduction_nd_s0_dw.cpp @@ -6,104 +6,49 @@ // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with 0-dimensional discard_write accessor. -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -// This allocator is needed only for the purpose of testing buffers -// with allocator that is not same_as sycl::buffer_allocator. -struct CustomAllocator : public sycl::buffer_allocator {}; - -template class KName; - -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - BinaryOperation BOp; - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - - // This assignment is discarded, but is still needed to verify - // that is discarded. - (OutBuf.template get_access())[0] = Init; - - // Compute. - - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - property_list PropList(property::reduction::initialize_to_identity{}); - auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp, PropList); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } - - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } -} +int NumErrors = 0; template -void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, false, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); - test, true, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { + constexpr access::mode DW = access::mode::discard_write; + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + NumErrors += + test(Q, Identity, Init, BOp, NDRange); } int main() { queue Q; + printDeviceInfo(Q); // Check some non power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 7); - testBoth>(Q, 0, 99, 49, 49 * 5); + tests(Q, 0, 99, std::plus{}, 1, 7); + tests(Q, 0, 99, std::plus{}, 49, 49 * 5); // Try some power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 32); - testBoth>(Q, 1, 99, 4, 32); - testBoth>(Q, 0, 99, 16, 256); - testBoth>(Q, ~0, 99, 32, 256); - testBoth>( - Q, (std::numeric_limits::max)(), -99, 64, 256); - testBoth>( - Q, (std::numeric_limits::min)(), 99, 128, 256); - testBoth>(Q, 0, 99, 256, 256); + tests(Q, 0, 99, std::plus<>{}, 1, 32); + tests(Q, 1, 99, std::multiplies<>{}, 4, 32); + tests(Q, 0, 99, std::bit_xor<>{}, 16, 256); + tests(Q, ~0, 99, std::bit_and<>{}, 32, 256); + tests(Q, (std::numeric_limits::max)(), -99, + ext::oneapi::minimum<>{}, 64, 256); + tests(Q, (std::numeric_limits::min)(), 99, + ext::oneapi::maximum<>{}, 128, 256); + tests(Q, 0, 99, std::plus<>{}, 256, 256); // Check with various types. - testBoth>(Q, 1, 99, 8, 256); - testBoth>(Q, 0x7fff, -99, 8, 256); - testBoth>(Q, 0, 99, 8, 256); + tests(Q, 1, 99, std::multiplies<>{}, 8, 16); + tests(Q, 0x7fff, -99, ext::oneapi::minimum<>{}, 8, 256); + tests(Q, 0, 99, ext::oneapi::maximum<>{}, 8, 256); // Check with CUSTOM type. - testBoth, CustomVecPlus>( - Q, CustomVec(0), CustomVec(99), 8, 256); + using CV = CustomVec; + tests(Q, CV(0), CV(99), CustomVecPlus{}, 8, 256); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_s0_rw.cpp b/SYCL/Reduction/reduction_nd_s0_rw.cpp index a5983f019f..a8b75b06a1 100644 --- a/SYCL/Reduction/reduction_nd_s0_rw.cpp +++ b/SYCL/Reduction/reduction_nd_s0_rw.cpp @@ -6,104 +6,50 @@ // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with 0-dimensional read_write accessor. -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -// This allocator is needed only for the purpose of testing buffers -// with allocator that is not same_as sycl::buffer_allocator. -struct CustomAllocator : public sycl::buffer_allocator {}; - -template class KName; - -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - BinaryOperation BOp; - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - - // The final reduction sum after running parallel_for() must include - // the original value it was initialized with before the parallel_for(). - CorrectOut = BOp(CorrectOut, Init); - (OutBuf.template get_access())[0] = Init; - - // Compute. - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } - - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } -} +int NumErrors = 0; template -void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, false, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); - test, true, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { + constexpr access::mode RW = access::mode::read_write; + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + NumErrors += + test(Q, Identity, Init, BOp, NDRange); } int main() { queue Q; + printDeviceInfo(Q); // Check non power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 7); - testBoth>(Q, 0, -99, 49, 49 * 5); + tests(Q, 0, 99, std::plus{}, 1, 7); + tests(Q, 0, -99, std::plus{}, 49, 49 * 5); // Try some power-of-two work-group sizes. - testBoth>(Q, 0, 99, 2, 32); - testBoth>(Q, 0, 199, 32, 32); - testBoth>(Q, 0, 299, 128, 256); - testBoth>(Q, 0, 399, 256, 256); + tests(Q, 0, 99, std::plus<>{}, 2, 32); + tests(Q, 0, 199, std::plus<>{}, 32, 32); + tests(Q, 0, 299, std::plus<>{}, 128, 256); + tests(Q, 0, 399, std::plus<>{}, 256, 256); // Check with various operations and types. - testBoth>(Q, 1, 2, 8, 256); - testBoth>(Q, 1, 1.2, 8, 32); - testBoth>(Q, 0, 0x3400, 4, 32); - testBoth>(Q, 0, 0x12340000, 4, 32); - testBoth>(Q, ~0, ~0, 4, 16); - testBoth>( - Q, (std::numeric_limits::max)(), -99, 8, 256); - testBoth>( - Q, (std::numeric_limits::min)(), 99, 8, 256); + tests(Q, 1, 2, std::multiplies<>{}, 8, 8); + tests(Q, 1, 1.2, std::multiplies<>{}, 8, 16); + tests(Q, 0, 0x3400, std::bit_or<>{}, 4, 32); + tests(Q, 0, 0x12340000, std::bit_xor<>{}, 4, 32); + tests(Q, ~0, ~0, std::bit_and<>{}, 4, 16); + tests(Q, (std::numeric_limits::max)(), -99, + ext::oneapi::minimum<>{}, 8, 256); + tests(Q, (std::numeric_limits::min)(), 99, + ext::oneapi::maximum<>{}, 8, 256); // Check with CUSTOM type. - testBoth, CustomVecPlus>( - Q, CustomVec(0), CustomVec(199), 8, 256); + using CV = CustomVec; + tests(Q, CV(0), CV(99), CustomVecPlus{}, 8, 256); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_s1_dw.cpp b/SYCL/Reduction/reduction_nd_s1_dw.cpp index 8745d054e3..3afbfb141b 100644 --- a/SYCL/Reduction/reduction_nd_s1_dw.cpp +++ b/SYCL/Reduction/reduction_nd_s1_dw.cpp @@ -8,100 +8,48 @@ // with reductions initialized with 1-dimensional discard_write accessor // accessing 1 element buffer. -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -template class KName; - -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - BinaryOperation BOp; - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - - // This assignment is discarded, but is still needed to verify - // that is discarded. - (OutBuf.template get_access())[0] = Init; - - // Compute. - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - property_list PropList(property::reduction::initialize_to_identity{}); - auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp, PropList); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } - - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } -} +int NumErrors = 0; template -void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, false, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); - test, true, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { + constexpr access::mode DW = access::mode::discard_write; + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + NumErrors += testBoth(Q, Identity, Init, BOp, NDRange); } int main() { queue Q; + printDeviceInfo(Q); // Check some non power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 7); - testBoth>(Q, 0, 99, 49, 49 * 5); + tests(Q, 0, 99, std::plus{}, 1, 7); + tests(Q, 0, 99, std::plus{}, 49, 49 * 5); // Try some power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 32); - testBoth>(Q, 1, 99, 4, 64); - testBoth>(Q, 0, 99, 8, 128); - testBoth>(Q, 0, 99, 16, 256); - testBoth>(Q, ~0, 99, 32, 256); - testBoth>( - Q, (std::numeric_limits::max)(), -99, 64, 256); - testBoth>( - Q, (std::numeric_limits::min)(), 99, 128, 256); - testBoth>(Q, 0, 99, 256, 256); + tests(Q, 0, 99, std::plus<>{}, 1, 32); + tests(Q, 0, 99, std::bit_or<>{}, 8, 128); + tests(Q, 0, 99, std::bit_xor<>{}, 16, 256); + tests(Q, ~0, 99, std::bit_and<>{}, 32, 256); + tests(Q, (std::numeric_limits::max)(), -99, + ext::oneapi::minimum<>{}, 64, 256); + tests(Q, (std::numeric_limits::min)(), 99, + ext::oneapi::maximum<>{}, 128, 256); + tests(Q, 0, 99, std::plus<>{}, 256, 256); // Check with various types. - testBoth>(Q, 1, 99, 8, 256); - testBoth>(Q, 0x7fff, -99, 8, 256); - testBoth>(Q, 0, 99, 8, 256); + tests(Q, 1, 99, std::multiplies<>{}, 8, 24); + tests(Q, 0x7fff, -99, ext::oneapi::minimum<>{}, 8, 256); + tests(Q, 0, 99, ext::oneapi::maximum<>{}, 8, 256); // Check with CUSTOM type. - testBoth, CustomVecPlus>( - Q, CustomVec(0), CustomVec(99), 8, 256); + using CV = CustomVec; + tests(Q, CV(0), CV(99), CustomVecPlus{}, 8, 256); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_nd_s1_rw.cpp b/SYCL/Reduction/reduction_nd_s1_rw.cpp index 2a40149555..654da943ec 100644 --- a/SYCL/Reduction/reduction_nd_s1_rw.cpp +++ b/SYCL/Reduction/reduction_nd_s1_rw.cpp @@ -7,104 +7,43 @@ // with reductions initialized with 1-dimensional read_write accessor // accessing 1 element buffer. -#include "reduction_utils.hpp" -#include -#include +#include "reduction_nd_range_scalar.hpp" using namespace cl::sycl; -// This allocator is needed only for the purpose of testing buffers -// with allocator that is not same_as sycl::buffer_allocator. -struct CustomAllocator : public sycl::buffer_allocator {}; - -template class KName; - -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - buffer InBuf(NWItems); - buffer OutBuf(1); - - // Initialize. - BinaryOperation BOp; - T CorrectOut; - initInputData(InBuf, CorrectOut, Identity, BOp, NWItems); - - // The final reduction sum after running parallel_for() must include - // the original value it was initialized with before the parallel_for(). - CorrectOut = BOp(CorrectOut, Init); - (OutBuf.template get_access())[0] = Init; - - // Compute. - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); - auto Redu = ONEAPI::reduction(Out, Identity, BOp); - - CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); - }); - } - - // Check correctness. - auto Out = OutBuf.template get_access(); - T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } -} +int NumErrors = 0; template -void testBoth(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, false, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); - test, true, T, BinaryOperation>(Q, Identity, Init, WGSize, - NWItems); +void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t WGSize, + size_t NWItems) { + constexpr access::mode RW = access::mode::read_write; + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + NumErrors += testBoth(Q, Identity, Init, BOp, NDRange); } int main() { queue Q; + printDeviceInfo(Q); // Check non power-of-two work-group sizes. - testBoth>(Q, 0, 99, 1, 7); - testBoth>(Q, 0, -99, 49, 49 * 5); - - // Try some power-of-two work-group sizes. - testBoth>(Q, 0, 99, 2, 32); - testBoth>(Q, 0, 199, 32, 128); - testBoth>(Q, 0, 299, 128, 128); - testBoth>(Q, 0, 399, 256, 256); + tests(Q, 0, 99, std::plus{}, 1, 7); + tests(Q, 0, -99, std::plus{}, 49, 49 * 5); // Check with various operations and types. - testBoth>(Q, 1, 2, 8, 256); - testBoth>(Q, 1, 1.2, 8, 16); - testBoth>(Q, 0, 0x3400, 4, 32); - testBoth>(Q, 0, 0x12340000, 2, 16); - testBoth>(Q, ~0, ~0, 4, 16); - testBoth>( - Q, (std::numeric_limits::max)(), 99, 8, 256); - testBoth>( - Q, (std::numeric_limits::min)(), -99, 8, 256); + tests(Q, 0, 99, std::plus<>{}, 1, 32); + tests(Q, 0, 99, std::bit_or<>{}, 8, 128); + tests(Q, 0, 99, std::bit_xor<>{}, 16, 256); + tests(Q, ~0, 99, std::bit_and<>{}, 32, 256); + tests(Q, (std::numeric_limits::max)(), -99, + ext::oneapi::minimum<>{}, 64, 256); + tests(Q, (std::numeric_limits::min)(), 99, + ext::oneapi::maximum<>{}, 128, 256); + tests(Q, 0, 99, std::plus<>{}, 256, 256); // Check with CUSTOM type. - testBoth, CustomVecPlus>( - Q, CustomVec(0), CustomVec(-199), 8, 256); + using CV = CustomVec; + tests(Q, CV(0), CV(-199), CustomVecPlus{}, 8, 256); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_placeholder.cpp b/SYCL/Reduction/reduction_placeholder.cpp index e47b040497..f71bea0255 100644 --- a/SYCL/Reduction/reduction_placeholder.cpp +++ b/SYCL/Reduction/reduction_placeholder.cpp @@ -4,23 +4,24 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUNx: %HOST_RUN_PLACEHOLDER %t.out -// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and // barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with a placeholder accessor. #include "reduction_utils.hpp" -#include -#include using namespace cl::sycl; -template class KNGroup; +template class KName; + +template +int test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + printTestLabel(false, NDRange); -template -void testOneCase(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { // Initialize. T CorrectOut; BinaryOperation BOp; @@ -39,57 +40,54 @@ void testOneCase(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); CGH.require(Out); - auto Redu = ONEAPI::reduction(Out, Identity, BinaryOperation()); - range<1> GlobalRange(NWItems); - range<1> LocalRange(WGSize); - nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for( - NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { - Sum.combine(In[NDIt.get_global_linear_id()]); - }); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); + CGH.parallel_for(NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { + Sum.combine(In[NDIt.get_global_linear_id()]); + }); }); Q.wait(); // Check correctness. T ReduVar = (OutBuf.template get_access())[0]; - if (ReduVar != CorrectOut) { - std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cout << "Computed value: " << ReduVar - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } + return checkResults(Q, false /*SYCL2020*/, BOp, NDRange, ReduVar, CorrectOut); } -template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - testOneCase, T, Dim, BinaryOperation, - access::mode::read_write>(Q, Identity, Init, WGSize, NWItems); - testOneCase, T, Dim, BinaryOperation, - access::mode::discard_write>(Q, Identity, Init, WGSize, NWItems); +int NumErrors = 0; + +template +void tests(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { + constexpr access::mode DW = access::mode::discard_write; + constexpr access::mode RW = access::mode::read_write; + + NumErrors += test, T, 0, BinaryOperation, DW>( + Q, Identity, Init, WGSize, NWItems); + NumErrors += test, T, 1, BinaryOperation, DW>( + Q, Identity, Init, WGSize, NWItems); + + NumErrors += test, T, 0, BinaryOperation, RW>( + Q, Identity, Init, WGSize, NWItems); + NumErrors += test, T, 1, BinaryOperation, RW>( + Q, Identity, Init, WGSize, NWItems); } int main() { queue Q; + printDeviceInfo(Q); + // fast atomics and fast reduce - test>(Q, 0, 77, 49, 49 * 5); - test>(Q, 0, -77, 8, 8); + tests>(Q, 0, 77, 49, 49 * 5); // fast atomics - test>(Q, 0, 233, 7, 7 * 3); - test>(Q, 0, 177, 4, 128); + tests>(Q, 0, 233, 7, 7 * 3); // fast reduce - test>( + tests>( Q, getMaximumFPValue(), -5.0, 5, 5 * 7); - test>( - Q, getMinimumFPValue(), -5.0, 4, 128); // generic algorithm - test>(Q, 1, 2, 7, 7 * 5); - test>(Q, 1, 3, 8, 16); - test, 0, CustomVecPlus>( + tests, CustomVecPlus>( Q, CustomVec(0), CustomVec(4), 8, 8 * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_queue_parallel_for.cpp b/SYCL/Reduction/reduction_queue_parallel_for.cpp index f143378fb5..ec3bfdd723 100644 --- a/SYCL/Reduction/reduction_queue_parallel_for.cpp +++ b/SYCL/Reduction/reduction_queue_parallel_for.cpp @@ -4,47 +4,38 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUNx: %HOST_RUN_PLACEHOLDER %t.out -// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and // barrier() // This test only checks that the method queue::parallel_for() accepting // reduction, can be properly translated into queue::submit + parallel_for(). -#include +#include "reduction_utils.hpp" + using namespace sycl; template class KName; -template int test(queue &Q) { - const size_t NElems = 1024; - const size_t WGSize = 256; +template +int test(queue &Q, T Identity, size_t WGSize, size_t NElems) { + nd_range<1> NDRange(range<1>{NElems}, range<1>{WGSize}); + printTestLabel(IsSYCL2020, NDRange); - int *Data = malloc_shared(NElems, Q); + T *Data = malloc_shared(NElems, Q); for (int I = 0; I < NElems; I++) Data[I] = I; - int *Sum = malloc_shared(1, Q); - *Sum = 0; - - if constexpr (IsSYCL2020Mode) { - Q.parallel_for( - nd_range<1>{NElems, WGSize}, sycl::reduction(Sum, std::plus<>()), - [=](nd_item<1> It, auto &Sum) { Sum += Data[It.get_global_id(0)]; }) - .wait(); - } else { - Q.parallel_for( - nd_range<1>{NElems, WGSize}, ONEAPI::reduction(Sum, ONEAPI::plus<>()), - [=](nd_item<1> It, auto &Sum) { Sum += Data[It.get_global_id(0)]; }) - .wait(); - } - - int ExpectedSum = (NElems - 1) * NElems / 2; - int Error = 0; - if (*Sum != ExpectedSum) { - std::cerr << "Error: Expected = " << ExpectedSum << ", Computed = " << *Sum - << std::endl; - Error = 1; - } + T *Sum = malloc_shared(1, Q); + *Sum = Identity; + + BinaryOperation BOp; + auto Redu = createReduction(Sum, BOp); + Q.parallel_for(NDRange, Redu, [=](nd_item<1> It, auto &Sum) { + Sum += Data[It.get_global_id(0)]; + }).wait(); + + T ExpectedSum = (NElems - 1) * NElems / 2; + int Error = checkResults(Q, IsSYCL2020, BOp, NDRange, *Sum, ExpectedSum); free(Data, Q); free(Sum, Q); @@ -53,7 +44,11 @@ template int test(queue &Q) { int main() { queue Q; - int Error = test, true>(Q); - Error += test, false>(Q); - return Error; + printDeviceInfo(Q); + + int NumErrors = test>(Q, 0, 16, 32); + NumErrors += test>(Q, 0, 7, 14); + + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_1d_s0_dw.cpp b/SYCL/Reduction/reduction_range_1d_s0_dw.cpp index a000c9cfe7..e84d764ced 100644 --- a/SYCL/Reduction/reduction_range_1d_s0_dw.cpp +++ b/SYCL/Reduction/reduction_range_1d_s0_dw.cpp @@ -14,10 +14,13 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t NWItems) { constexpr access::mode DW = access::mode::discard_write; - test(Q, Identity, Init, BOp, range<1>{NWItems}); + NumErrors += + test(Q, Identity, Init, BOp, range<1>{NWItems}); } int main() { @@ -38,19 +41,17 @@ int main() { tests(Q, ~0, ~0, std::bit_and<>{}, 8); tests(Q, 0, 0x12340000, std::bit_xor<>{}, 16); tests(Q, 0, 0x3400, std::bit_or<>{}, MaxWGSize * 4); - tests(Q, 1, 2, std::multiplies<>{}, 256); - tests(Q, 1, 3, std::multiplies<>{}, MaxWGSize + 1); - tests(Q, (std::numeric_limits::max)(), -99, + tests(Q, 1, 2, std::multiplies<>{}, 31); + tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, MaxWGSize * 2); - tests(Q, (std::numeric_limits::min)(), 99, + tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, 8); - tests(Q, 1, 99, std::multiplies<>{}, 37); // Check with CUSTOM type. using CV = CustomVec; tests(Q, CV(0), CV(99), CustomVecPlus{}, 64); tests(Q, CV(0), CV(99), CustomVecPlus{}, MaxWGSize * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_1d_s0_rw.cpp b/SYCL/Reduction/reduction_range_1d_s0_rw.cpp index 8e23e9e26a..bc1e2befed 100644 --- a/SYCL/Reduction/reduction_range_1d_s0_rw.cpp +++ b/SYCL/Reduction/reduction_range_1d_s0_rw.cpp @@ -14,10 +14,13 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t NWItems) { constexpr access::mode RW = access::mode::read_write; - test(Q, Identity, Init, BOp, range<1>{NWItems}); + NumErrors += + test(Q, Identity, Init, BOp, range<1>{NWItems}); } int main() { @@ -38,13 +41,12 @@ int main() { tests(Q, ~0, ~0, std::bit_and<>{}, 8); tests(Q, 0, 0x12340000, std::bit_xor<>{}, 16); tests(Q, 0, 0x3400, std::bit_or<>{}, MaxWGSize * 4); - tests(Q, 1, 2, std::multiplies<>{}, 256); - tests(Q, 1, 3, std::multiplies<>{}, MaxWGSize + 1); - tests(Q, (std::numeric_limits::max)(), -99, + tests(Q, 1, 2, std::multiplies<>{}, 17); + tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, MaxWGSize * 2); - tests(Q, (std::numeric_limits::min)(), 99, + tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, 8); - tests(Q, 1, 99, std::multiplies<>{}, MaxWGSize); + tests(Q, 1, 99, std::multiplies<>{}, 16); // Check with CUSTOM type. tests(Q, CustomVec(0), CustomVec(99), @@ -52,6 +54,6 @@ int main() { tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, MaxWGSize * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_1d_s1_dw.cpp b/SYCL/Reduction/reduction_range_1d_s1_dw.cpp index e2924a0050..ff5c01898e 100644 --- a/SYCL/Reduction/reduction_range_1d_s1_dw.cpp +++ b/SYCL/Reduction/reduction_range_1d_s1_dw.cpp @@ -14,10 +14,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t NWItems) { constexpr access::mode DW = access::mode::discard_write; - testBoth(Q, Identity, Init, BOp, range<1>{NWItems}); + NumErrors += testBoth(Q, Identity, Init, BOp, range<1>{NWItems}); } int main() { @@ -27,24 +29,22 @@ int main() { Q.get_device().get_info(); // Fast-reduce and Fast-atomics. Try various range types/sizes. - tests(Q, 0, 99, std::plus<>{}, 1); - tests(Q, 0, 99, std::plus<>{}, 2); - tests(Q, 0, 99, std::plus<>{}, 7); - tests(Q, 0, 99, std::plus<>{}, 64); - tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2); - tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5); + tests(Q, 0, 99, std::plus{}, 1); + tests(Q, 0, 99, std::plus<>{}, 7); + tests(Q, 0, 99, std::plus<>{}, 64); + tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2); + tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5); // Try various types & ranges. tests(Q, ~0, 99, std::bit_and<>{}, 7); tests(Q, 0, 0xff99, std::bit_xor<>{}, MaxWGSize); tests(Q, 0, 0xff99, std::bit_or<>{}, 3); - tests(Q, 1, 3, std::multiplies<>{}, 32); - tests(Q, 1, 3, std::multiplies<>{}, MaxWGSize * 4); - tests(Q, (std::numeric_limits::max)(), -99, + tests(Q, 1, 2, std::multiplies<>{}, 7); + tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, MaxWGSize * 2); - tests(Q, (std::numeric_limits::min)(), 99, + tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, 8); - tests(Q, 1, 99, std::multiplies<>{}, MaxWGSize); + tests(Q, 1, 99, std::multiplies<>{}, 10); // Check with CUSTOM type. tests(Q, CustomVec(0), CustomVec(99), @@ -52,6 +52,6 @@ int main() { tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, MaxWGSize * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_1d_s1_rw.cpp b/SYCL/Reduction/reduction_range_1d_s1_rw.cpp index 01d55587b7..bc10d647ce 100644 --- a/SYCL/Reduction/reduction_range_1d_s1_rw.cpp +++ b/SYCL/Reduction/reduction_range_1d_s1_rw.cpp @@ -18,10 +18,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, size_t NWItems) { constexpr access::mode RW = access::mode::read_write; - testBoth(Q, Identity, Init, BOp, range<1>{NWItems}); + NumErrors += testBoth(Q, Identity, Init, BOp, range<1>{NWItems}); } int main() { @@ -32,23 +34,21 @@ int main() { // Fast-reduce and Fast-atomics. Try various range types/sizes. tests(Q, 0, 99, std::plus{}, 1); - tests(Q, 0, 99, std::plus{}, 2); - tests(Q, 0, 99, std::plus<>{}, 7); - tests(Q, 0, 99, std::plus<>{}, 64); - tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2); - tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5); + tests(Q, 0, 99, std::plus<>{}, 7); + tests(Q, 0, 99, std::plus<>{}, 64); + tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2); + tests(Q, 0, 99, std::plus<>{}, MaxWGSize * 2 + 5); // Try various types & ranges. tests(Q, ~0, ~0, std::bit_and<>{}, 8); tests(Q, 0, 0x12340000, std::bit_xor<>{}, 16); tests(Q, 0, 0x3400, std::bit_or<>{}, MaxWGSize * 4); - tests(Q, 1, 2, std::multiplies<>{}, 256); - tests(Q, 1, 3, std::multiplies<>{}, MaxWGSize * 4); + tests(Q, 1, 2, std::multiplies<>{}, 16); + tests(Q, 1, 3, std::multiplies<>{}, 11); tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, MaxWGSize * 2); tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, 8); - tests(Q, 1, 99, std::multiplies<>{}, MaxWGSize); // Check with CUSTOM type. tests(Q, CustomVec(0), CustomVec(99), @@ -56,6 +56,6 @@ int main() { tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, MaxWGSize * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_2d_s1_dw.cpp b/SYCL/Reduction/reduction_range_2d_s1_dw.cpp index 0846b0ac0e..79476a1f52 100644 --- a/SYCL/Reduction/reduction_range_2d_s1_dw.cpp +++ b/SYCL/Reduction/reduction_range_2d_s1_dw.cpp @@ -14,10 +14,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, range<2> Range) { constexpr access::mode DW = access::mode::discard_write; - testBoth(Q, Identity, Init, BOp, Range); + NumErrors += testBoth(Q, Identity, Init, BOp, Range); } int main() { @@ -38,9 +40,8 @@ int main() { tests(Q, 0, 0x2021ff99, std::bit_xor<>{}, range<2>{3, 3}); tests(Q, ~0, 99, std::bit_and<>{}, range<2>{4, 3}); tests(Q, 0, 99, std::bit_or<>{}, range<2>{2, 2}); - tests(Q, 1, 3, std::multiplies<>{}, range<2>{16, 3}); - tests(Q, 1, 3, std::multiplies<>{}, - range<2>{3, MaxWGSize}); + tests(Q, 1, 3, std::multiplies<>{}, range<2>{8, 3}); + tests(Q, 1, 3, std::multiplies<>{}, range<2>{3, 7}); tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, range<2>{8, 3}); tests(Q, (std::numeric_limits::min)(), 99, @@ -50,6 +51,6 @@ int main() { tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, range<2>{33, MaxWGSize}); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_2d_s1_rw.cpp b/SYCL/Reduction/reduction_range_2d_s1_rw.cpp index 56a5a9e7af..4dea17a2b7 100644 --- a/SYCL/Reduction/reduction_range_2d_s1_rw.cpp +++ b/SYCL/Reduction/reduction_range_2d_s1_rw.cpp @@ -14,10 +14,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, range<2> Range) { constexpr access::mode RW = access::mode::read_write; - testBoth(Q, Identity, Init, BOp, Range); + NumErrors += testBoth(Q, Identity, Init, BOp, Range); } int main() { @@ -38,9 +40,8 @@ int main() { tests(Q, 0, 0x2021ff99, std::bit_xor<>{}, range<2>{3, 3}); tests(Q, ~0, 99, std::bit_and<>{}, range<2>{4, 3}); tests(Q, 0, 99, std::bit_or<>{}, range<2>{2, 2}); - tests(Q, 1, 3, std::multiplies<>{}, range<2>{16, 3}); - tests(Q, 1, 3, std::multiplies<>{}, - range<2>{3, MaxWGSize}); + tests(Q, 1, 3, std::multiplies<>{}, range<2>{8, 3}); + tests(Q, 1, 3, std::multiplies<>{}, range<2>{3, 8}); tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, range<2>{8, 3}); tests(Q, (std::numeric_limits::min)(), 99, @@ -50,6 +51,6 @@ int main() { tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, range<2>{33, MaxWGSize}); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_3d_s1_dw.cpp b/SYCL/Reduction/reduction_range_3d_s1_dw.cpp index d0b11a98f5..26fb3b1c50 100644 --- a/SYCL/Reduction/reduction_range_3d_s1_dw.cpp +++ b/SYCL/Reduction/reduction_range_3d_s1_dw.cpp @@ -14,10 +14,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, range<3> Range) { constexpr access::mode DW = access::mode::discard_write; - testBoth(Q, Identity, Init, BOp, Range); + NumErrors += testBoth(Q, Identity, Init, BOp, Range); } int main() { @@ -50,19 +52,17 @@ int main() { range<3>{MaxWGSize * 3, 4, 3}); tests(Q, 0, 99, std::bit_or<>{}, range<3>{2, 2, MaxWGSize * 3}); - tests(Q, 1, 3, std::multiplies<>{}, range<3>{16, 3, 5}); - tests(Q, 1, 3, std::multiplies<>{}, - range<3>{2, 3, MaxWGSize}); + tests(Q, 1, 3, std::multiplies<>{}, range<3>{2, 3, 5}); + tests(Q, 1, 2, std::multiplies<>{}, range<3>{2, 3, 8}); tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, range<3>{MaxWGSize, 8, 3}); tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, range<3>{3, MaxWGSize, 3}); - tests(Q, 1, 99, std::multiplies<>{}, - range<3>{3, 3, MaxWGSize}); + tests(Q, 1, 99, std::multiplies<>{}, range<3>{3, 3, 5}); tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, range<3>{2, 33, MaxWGSize}); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_3d_s1_rw.cpp b/SYCL/Reduction/reduction_range_3d_s1_rw.cpp index 963e9b4769..b5b8ddc0d3 100644 --- a/SYCL/Reduction/reduction_range_3d_s1_rw.cpp +++ b/SYCL/Reduction/reduction_range_3d_s1_rw.cpp @@ -14,10 +14,12 @@ using namespace cl::sycl; +int NumErrors = 0; + template void tests(queue &Q, T Identity, T Init, BinaryOperation BOp, range<3> Range) { constexpr access::mode RW = access::mode::read_write; - testBoth(Q, Identity, Init, BOp, Range); + NumErrors += testBoth(Q, Identity, Init, BOp, Range); } int main() { @@ -50,19 +52,17 @@ int main() { range<3>{MaxWGSize * 3, 4, 3}); tests(Q, 0, 99, std::bit_or<>{}, range<3>{2, 2, MaxWGSize * 3}); - tests(Q, 1, 3, std::multiplies<>{}, range<3>{16, 3, 5}); - tests(Q, 1, 3, std::multiplies<>{}, - range<3>{2, 3, MaxWGSize}); + tests(Q, 1, 3, std::multiplies<>{}, range<3>{2, 3, 5}); + tests(Q, 1, 3, std::multiplies<>{}, range<3>{5, 2, 3}); tests(Q, (std::numeric_limits::max)(), -99, ext::oneapi::minimum<>{}, range<3>{MaxWGSize, 8, 3}); tests(Q, (std::numeric_limits::min)(), 99, ext::oneapi::maximum<>{}, range<3>{3, MaxWGSize, 3}); - tests(Q, 1, 99, std::multiplies<>{}, - range<3>{3, 3, MaxWGSize}); + tests(Q, 1, 99, std::multiplies<>{}, range<3>{3, 3, 4}); tests(Q, CustomVec(0), CustomVec(99), CustomVecPlus{}, range<3>{2, 33, MaxWGSize}); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_lambda.cpp b/SYCL/Reduction/reduction_range_lambda.cpp index ef7a1d46be..b677055609 100644 --- a/SYCL/Reduction/reduction_range_lambda.cpp +++ b/SYCL/Reduction/reduction_range_lambda.cpp @@ -25,26 +25,34 @@ int main() { auto LambdaSum = [](auto x, auto y) { return (x + y); }; - testBoth(Q, 0, 99, LambdaSum, range<1>{7}); - testBoth(Q, 0, 99, LambdaSum, range<1>{7}); + int NumErrors = 0; - testBoth(Q, 0, 99, LambdaSum, range<1>{MaxWGSize + 1}); - testBoth(Q, 0, 99, LambdaSum, range<1>{MaxWGSize + 1}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, range<1>{7}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, range<1>{7}); - testBoth(Q, 0, 99, LambdaSum, range<2>{3, 4}); - testBoth(Q, 0, 99, LambdaSum, range<2>{3, 4}); + NumErrors += + testBoth(Q, 0, 99, LambdaSum, range<1>{MaxWGSize + 1}); + NumErrors += + testBoth(Q, 0, 99, LambdaSum, range<1>{MaxWGSize + 1}); - testBoth(Q, 0, 99, LambdaSum, range<2>{3, MaxWGSize + 1}); - testBoth(Q, 0, 99, LambdaSum, range<2>{3, MaxWGSize + 1}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, range<2>{3, 4}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, range<2>{3, 4}); - testBoth(Q, 0, 99, LambdaSum, range<3>{2, 3, 4}); - testBoth(Q, 0, 99, LambdaSum, range<3>{2, 3, 4}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, + range<2>{3, MaxWGSize + 1}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, + range<2>{3, MaxWGSize + 1}); - testBoth(Q, 0, 99, LambdaSum, - range<3>{2, 3, MaxWGSize + 1}); - testBoth(Q, 0, 99, LambdaSum, - range<3>{2, 3, MaxWGSize + 1}); + NumErrors += + testBoth(Q, 0, 99, LambdaSum, range<3>{2, 3, 4}); + NumErrors += + testBoth(Q, 0, 99, LambdaSum, range<3>{2, 3, 4}); - std::cout << "Test passed\n"; - return 0; + NumErrors += testBoth(Q, 0, 99, LambdaSum, + range<3>{2, 3, MaxWGSize + 1}); + NumErrors += testBoth(Q, 0, 99, LambdaSum, + range<3>{2, 3, MaxWGSize + 1}); + + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_range_scalar.hpp b/SYCL/Reduction/reduction_range_scalar.hpp index 7ee833f9d1..ef03f2ace1 100644 --- a/SYCL/Reduction/reduction_range_scalar.hpp +++ b/SYCL/Reduction/reduction_range_scalar.hpp @@ -3,38 +3,21 @@ // accessing a scalar holding the reduction result. #include "reduction_utils.hpp" -#include -#include using namespace cl::sycl; template class KName; -template -std::ostream &operator<<(std::ostream &OS, const range &Range) { - OS << "{" << Range[0]; - if constexpr (Dims > 1) - OS << ", " << Range[1]; - if constexpr (Dims > 2) - OS << ", " << Range[2]; - OS << "}"; - return OS; -} - -template -void test(queue &Q, T Identity, T Init, BinaryOperation BOp, - range Range) { - std::string StdMode = IsSYCL2020Mode ? "SYCL2020" : "ONEAPI "; - std::cout << "Running the test case: " << StdMode - << " {T=" << typeid(T).name() - << ", BOp=" << typeid(BinaryOperation).name() << ", Range=" << Range - << std::endl; +template +int test(queue &Q, T Identity, T Init, BinaryOperation BOp, + const range &Range) { + printTestLabel(IsSYCL2020, Range); // Skip the test for such big arrays now. constexpr size_t TwoGB = 2LL * 1024 * 1024 * 1024; if (Range.size() > TwoGB) - return; + return 0; buffer InBuf(Range); buffer OutBuf(1); @@ -51,42 +34,24 @@ void test(queue &Q, T Identity, T Init, BinaryOperation BOp, (OutBuf.template get_access())[0] = Init; // Compute. - if constexpr (IsSYCL2020Mode) { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - property_list PropList = getPropertyList(); - auto Redu = sycl::reduction(OutBuf, CGH, Identity, BOp, PropList); - - CGH.parallel_for( - Range, Redu, [=](id Id, auto &Sum) { Sum.combine(In[Id]); }); - }); - } else { - Q.submit([&](handler &CGH) { - auto In = InBuf.template get_access(CGH); - accessor Out(OutBuf, CGH); - auto Redu = ext::oneapi::reduction(Out, Identity, BOp); - - CGH.parallel_for( - Range, Redu, [=](id Id, auto &Sum) { Sum.combine(In[Id]); }); - }); - } + Q.submit([&](handler &CGH) { + auto In = InBuf.template get_access(CGH); + auto Redu = + createReduction(OutBuf, CGH, Identity, BOp); + CGH.parallel_for( + Range, Redu, [=](id Id, auto &Sum) { Sum.combine(In[Id]); }); + }); // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - if (ComputedOut != CorrectOut) { - printDeviceInfo(Q, true); - std::cerr << "Error: Range = " << Range << ", " - << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut << "\n"; - assert(0 && "Wrong value."); - } + return checkResults(Q, IsSYCL2020, BOp, Range, ComputedOut, CorrectOut); } template -void testBoth(queue &Q, T Identity, T Init, BinaryOperation BOp, - range Range) { - test, false, Mode>(Q, Identity, Init, BOp, Range); - test, true, Mode>(Q, Identity, Init, BOp, Range); +int testBoth(queue &Q, T Identity, T Init, BinaryOperation BOp, + const range &Range) { + return test, false, Mode>(Q, Identity, Init, BOp, Range) + + test, true, Mode>(Q, Identity, Init, BOp, Range); } diff --git a/SYCL/Reduction/reduction_reducer_op_eq.cpp b/SYCL/Reduction/reduction_reducer_op_eq.cpp index 07c4bbce28..e6990c875e 100644 --- a/SYCL/Reduction/reduction_reducer_op_eq.cpp +++ b/SYCL/Reduction/reduction_reducer_op_eq.cpp @@ -6,9 +6,7 @@ // This test checks that operators ++, +=, *=, |=, &=, ^= are supported // whent the corresponding std::plus<>, std::multiplies, etc are defined. -#include -#include -#include +#include "reduction_utils.hpp" using namespace sycl; @@ -78,19 +76,13 @@ template <> struct bit_and { }; } // namespace std -template -auto createReduction(T *USMPtr, T Identity, BinaryOperation BOp) { - if constexpr (IsSYCL2020Mode) - return sycl::reduction(USMPtr, Identity, BOp); - else - return ONEAPI::reduction(USMPtr, Identity, BOp); -} - template int test(queue &Q, T Identity) { constexpr size_t N = 16; constexpr size_t L = 4; + nd_range<1> NDR{N, L}; + printTestLabel(IsSYCL2020Mode, NDR); T *Data = malloc_host(N, Q); T *Res = malloc_host(1, Q); @@ -107,7 +99,6 @@ int test(queue &Q, T Identity) { *Res = Identity; auto Red = createReduction(Res, Identity, BOp); - nd_range<1> NDR{N, L}; if constexpr (OpEq == PlusPlus) { auto Lambda = [=](nd_item<1> ID, auto &Sum) { ++Sum; }; Q.submit([&](handler &H) { H.parallel_for(NDR, Red, Lambda); }).wait(); @@ -189,11 +180,12 @@ template int testINTPack(queue &Q) { int main() { queue Q; - int Error = 0; - Error += testFPPack(Q); - Error += testINTPack(Q); - Error += testINTPack(Q); - - std::cout << (Error ? "Failed\n" : "Passed.\n"); - return Error; + printDeviceInfo(Q); + int NumErrors = 0; + NumErrors += testFPPack(Q); + NumErrors += testINTPack(Q); + NumErrors += testINTPack(Q); + + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_usm.cpp b/SYCL/Reduction/reduction_usm.cpp index 941c113881..e3c22b17d2 100644 --- a/SYCL/Reduction/reduction_usm.cpp +++ b/SYCL/Reduction/reduction_usm.cpp @@ -4,46 +4,33 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // This test performs basic checks of parallel_for(nd_range, reduction, func) -// with reductions initialized with USM var. It tests both ONEAPI::reduction -// and SYCL-2020 reduction (sycl::reduction) assuming only read-write access, -// i.e. without using SYCL-2020 property::reduction::initialize_to_identity. +// with reductions initialized with USM var. It tests both +// ext::oneapi::reduction and SYCL-2020 reduction (sycl::reduction) assuming +// only read-write access, i.e. without using SYCL-2020 +// property::reduction::initialize_to_identity. #include "reduction_utils.hpp" -#include -#include using namespace cl::sycl; -template class KernelNameGroup; - -template -auto createReduction(T *USMPtr, T Identity, BinaryOperation BOp) { - if constexpr (IsSYCL2020) - return sycl::reduction(USMPtr, Identity, BOp); - else - return ONEAPI::reduction(USMPtr, Identity, BOp); -} +template class KName; template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, - usm::alloc AllocType) { +int test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, + usm::alloc AllocType) { + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + printTestLabel(IsSYCL2020, NDRange); + auto Dev = Q.get_device(); - if (AllocType == usm::alloc::shared && - !Dev.get_info()) - return; - if (AllocType == usm::alloc::host && - !Dev.get_info()) - return; - if (AllocType == usm::alloc::device && - !Dev.get_info()) - return; + if (!Dev.has(getUSMAspect(AllocType))) + return 0; T *ReduVarPtr = (T *)malloc(sizeof(T), Dev, Q.get_context(), AllocType); if (ReduVarPtr == nullptr) - return; + return 0; if (AllocType == usm::alloc::device) { Q.submit([&](handler &CGH) { - CGH.single_task>( + CGH.single_task>( [=]() { *ReduVarPtr = Init; }); }).wait(); } else { @@ -62,9 +49,7 @@ void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); auto Redu = createReduction(ReduVarPtr, Identity, BOp); - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - - CGH.parallel_for>( + CGH.parallel_for>( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -76,63 +61,65 @@ void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, buffer Buf(&ComputedOut, range<1>(1)); Q.submit([&](handler &CGH) { auto OutAcc = Buf.template get_access(CGH); - CGH.single_task>( + CGH.single_task>( [=]() { OutAcc[0] = *ReduVarPtr; }); }).wait(); ComputedOut = (Buf.template get_access())[0]; } else { ComputedOut = *ReduVarPtr; } - if (ComputedOut != CorrectOut) { - std::cerr << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cerr << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut - << ", AllocMode: " << static_cast(AllocType) << "\n"; - assert(0 && "Wrong value."); - } + std::string AllocStr = + "AllocMode=" + std::to_string(static_cast(AllocType)); + int Error = checkResults(Q, IsSYCL2020, BOp, NDRange, ComputedOut, CorrectOut, + AllocStr); free(ReduVarPtr, Q.get_context()); + return Error; } +int NumErrors = 0; + template void testUSM(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { // Test SYCL-2020 reductions - test, true, T, BinaryOperation>( + NumErrors += test, true, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::shared); - test, true, T, BinaryOperation>( + NumErrors += test, true, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::host); - test, true, T, BinaryOperation>( + NumErrors += test, true, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::device); - // Test ONEAPI reductions - test, false, T, BinaryOperation>( + // Test ext::oneapi reductions + NumErrors += test, false, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::shared); - test, false, T, BinaryOperation>( + NumErrors += test, false, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::host); - test, false, T, BinaryOperation>( + NumErrors += test, false, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::device); } int main() { queue Q; + printDeviceInfo(Q); + // fast atomics and fast reduce - testUSM>(Q, 0, 99, 49, 5 * 49); + testUSM>(Q, 0, 99, 49, 5 * 49); // fast atomics - testUSM>(Q, 0, 0xff00ff00, 7, 7); - testUSM>(Q, 0, 0x7f007f00, 4, 32); + testUSM>(Q, 0, 0xff00ff00, 7, 7); + testUSM>(Q, 0, 0x7f007f00, 4, 32); // fast reduce - testUSM>( + testUSM>( Q, getMaximumFPValue(), -100.0, 17, 17); - testUSM>( + testUSM>( Q, getMinimumFPValue(), 100.0, 4, 32); // generic algorithm - testUSM>(Q, 1, 5, 7, 7); + testUSM>(Q, 1, 5, 7, 7); testUSM, CustomVecPlus>( Q, CustomVec(0), CustomVec(77), 8, 8 * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_usm_dw.cpp b/SYCL/Reduction/reduction_usm_dw.cpp index e3c9b0d6a1..42f4f41a59 100644 --- a/SYCL/Reduction/reduction_usm_dw.cpp +++ b/SYCL/Reduction/reduction_usm_dw.cpp @@ -12,33 +12,30 @@ // is created with property::reduction::initialize_to_identity. #include "reduction_utils.hpp" -#include -#include using namespace cl::sycl; -template class KernelNameGroup; +template class KName; + +// Discard-write access to USM reductions is available only in SYCL2020. +constexpr bool IsSYCL2020 = true; template -void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, - usm::alloc AllocType) { +int test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, + usm::alloc AllocType) { + nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); + printTestLabel(IsSYCL2020, NDRange); + auto Dev = Q.get_device(); - if (AllocType == usm::alloc::shared && - !Dev.get_info()) - return; - if (AllocType == usm::alloc::host && - !Dev.get_info()) - return; - if (AllocType == usm::alloc::device && - !Dev.get_info()) - return; + if (!Dev.has(getUSMAspect(AllocType))) + return 0; T *ReduVarPtr = (T *)malloc(sizeof(T), Dev, Q.get_context(), AllocType); if (ReduVarPtr == nullptr) - return; + return 0; if (AllocType == usm::alloc::device) { Q.submit([&](handler &CGH) { - CGH.single_task>( + CGH.single_task>( [=]() { *ReduVarPtr = Init; }); }).wait(); } else { @@ -55,11 +52,9 @@ void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, // Compute. Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - property_list PropList(property::reduction::initialize_to_identity{}); - auto Redu = sycl::reduction(ReduVarPtr, Identity, BOp, PropList); - nd_range<1> NDRange(range<1>{NWItems}, range<1>{WGSize}); - - CGH.parallel_for>( + auto Redu = createReduction( + ReduVarPtr, Identity, BOp); + CGH.parallel_for>( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -71,54 +66,56 @@ void test(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems, buffer Buf(&ComputedOut, range<1>(1)); Q.submit([&](handler &CGH) { auto OutAcc = Buf.template get_access(CGH); - CGH.single_task>( + CGH.single_task>( [=]() { OutAcc[0] = *ReduVarPtr; }); }).wait(); ComputedOut = (Buf.template get_access())[0]; } else { ComputedOut = *ReduVarPtr; } - if (ComputedOut != CorrectOut) { - std::cerr << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; - std::cerr << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut - << ", AllocMode: " << static_cast(AllocType) << "\n"; - assert(0 && "Wrong value."); - } + std::string AllocStr = + "AllocMode=" + std::to_string(static_cast(AllocType)); + int Error = checkResults(Q, IsSYCL2020, BOp, NDRange, ComputedOut, CorrectOut, + AllocStr); free(ReduVarPtr, Q.get_context()); + return Error; } +int NumErrors = 0; + template void testUSM(queue &Q, T Identity, T Init, size_t WGSize, size_t NWItems) { - test, T, BinaryOperation>( + NumErrors += test, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::shared); - test, T, BinaryOperation>( + NumErrors += test, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::host); - test, T, BinaryOperation>( + NumErrors += test, T, BinaryOperation>( Q, Identity, Init, WGSize, NWItems, usm::alloc::device); } int main() { queue Q; + printDeviceInfo(Q); + // fast atomics and fast reduce - testUSM>(Q, 0, 99, 49, 5 * 49); + testUSM>(Q, 0, 99, 49, 5 * 49); // fast atomics - testUSM>(Q, 0, 0xff00ff00, 7, 7); - testUSM>(Q, 0, 0x7f007f00, 4, 32); + testUSM>(Q, 0, 0xff00ff00, 7, 7); + testUSM>(Q, 0, 0x7f007f00, 4, 32); // fast reduce - testUSM>( + testUSM>( Q, getMaximumFPValue(), -100.0, 17, 17); - testUSM>( + testUSM>( Q, getMinimumFPValue(), 100.0, 4, 32); // generic algorithm - testUSM>(Q, 1, 5, 7, 7); + testUSM>(Q, 1, 5, 7, 7); testUSM, CustomVecPlus>( Q, CustomVec(0), CustomVec(77), 8, 8 * 3); - std::cout << "Test passed\n"; - return 0; + printFinalStatus(NumErrors); + return NumErrors; } diff --git a/SYCL/Reduction/reduction_utils.hpp b/SYCL/Reduction/reduction_utils.hpp index e51030f1ea..6303fe1d46 100644 --- a/SYCL/Reduction/reduction_utils.hpp +++ b/SYCL/Reduction/reduction_utils.hpp @@ -11,8 +11,9 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, size_t N = Range.size(); auto In = InBuf.template get_access(); for (int I = 0; I < N; ++I) { - if (std::is_same>::value) - In[I] = 1 + (((I % 37) == 0) ? 1 : 0); + if (std::is_same_v> || + std::is_same_v>) + In[I] = 1.1 + (((I % 11) == 0) ? 1 : 0); else In[I] = ((I + 1) % 5) + 1.1; ExpectedOut = BOp(ExpectedOut, In[I]); @@ -28,8 +29,9 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, auto In = InBuf.template get_access(); for (int J = 0; J < Range[0]; ++J) { for (int I = 0; I < Range[1]; ++I) { - if (std::is_same>::value) - In[J][I] = 1 + ((((I * 2 + J * 3) % 37) == 0) ? 1 : 0); + if (std::is_same_v> || + std::is_same_v>) + In[J][I] = 1.1 + ((((I + J * 3) % 11) == 0) ? 1 : 0); else In[J][I] = ((I + 1 + J) % 5) + 1.1; ExpectedOut = BOp(ExpectedOut, In[J][I]); @@ -47,8 +49,9 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, for (int K = 0; K < Range[0]; ++K) { for (int J = 0; J < Range[1]; ++J) { for (int I = 0; I < Range[2]; ++I) { - if (std::is_same>::value) - In[K][J][I] = 1 + ((((I * 2 + J * 3 + K) % 37) == 0) ? 1 : 0); + if (std::is_same_v> || + std::is_same_v>) + In[K][J][I] = 1.1 + ((((I + J * 3 + K) % 11) == 0) ? 1 : 0); else In[K][J][I] = ((I + 1 + J + K * 3) % 5) + 1.1; ExpectedOut = BOp(ExpectedOut, In[K][J][I]); @@ -75,6 +78,23 @@ bool operator==(const CustomVec &A, const CustomVec &B) { return A.X == B.X && A.Y == B.Y; } template +bool operator<(const CustomVec &A, const CustomVec &B) { + return A.X < B.X && A.Y < B.Y; +} +template +CustomVec operator/(const CustomVec &A, const CustomVec &B) { + return {A.X / B.X && A.Y / B.Y}; +} +template +CustomVec operator-(const CustomVec &A, const CustomVec &B) { + return {A.X - B.X && A.Y - B.Y}; +} +namespace std { +template CustomVec abs(const CustomVec &A) { + return {std::abs(A.X), std::abs(A.Y)}; +} +} // namespace std +template std::ostream &operator<<(std::ostream &OS, const CustomVec &V) { return OS << "(" << V.X << ", " << V.Y << ")"; } @@ -105,14 +125,142 @@ template property_list getPropertyList() { } void printDeviceInfo(queue &Q, bool ToCERR = false) { + static int IsErrDeviceInfoPrinted = 0; + if (IsErrDeviceInfoPrinted >= 2) + return; + IsErrDeviceInfoPrinted++; + device D = Q.get_device(); auto Name = D.get_info(); size_t MaxWGSize = D.get_info(); size_t LocalMemSize = D.get_info(); - if (ToCERR) - std::cout << "Device: " << Name << ", MaxWGSize: " << MaxWGSize - << ", LocalMemSize: " << LocalMemSize << std::endl; + std::ostream &OS = ToCERR ? std::cerr : std::cout; + OS << "Device: " << Name << ", MaxWGSize: " << MaxWGSize + << ", LocalMemSize: " << LocalMemSize + << ", Driver: " << D.get_info() << std::endl; +} + +template +std::ostream &operator<<(std::ostream &OS, const range &Range) { + OS << "{" << Range[0]; + if constexpr (Dims > 1) + OS << ", " << Range[1]; + if constexpr (Dims > 2) + OS << ", " << Range[2]; + OS << "}"; + return OS; +} + +template +std::ostream &operator<<(std::ostream &OS, const nd_range &Range) { + OS << "{" << Range.get_global_range() << ", " << Range.get_local_range() + << "}"; + return OS; +} + +template +void printTestLabel(bool IsSYCL2020, const RangeT &Range, bool ToCERR = false) { + std::ostream &OS = ToCERR ? std::cerr : std::cout; + std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI "; + OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode + << ", T=" << typeid(T).name() << ", BOp=" << typeid(BinaryOperation).name() + << ", Range=" << Range; +} + +template constexpr bool isPreciseResultFP() { + return (std::is_floating_point_v || std::is_same_v)&&( + std::is_same_v, BOp> || + std::is_same_v, BOp> || + std::is_same_v, BOp> || + std::is_same_v, BOp>); +} + +template +int checkResults(queue &Q, bool IsSYCL2020, BinaryOperation, + const RangeT &Range, const T &ComputedRes, const T &CorrectRes, + std::string AddInfo = "") { + std::string ErrorStr; + bool Passed; + + if constexpr (std::is_floating_point_v || std::is_same_v) { + // It is a pretty simple and naive FP diff check here, which though + // should work reasonably well for most of cases handled in reduction + // tests. + T MaxDiff = std::numeric_limits::epsilon() * std::fabs(CorrectRes); + if constexpr (std::is_same_v> || + std::is_same_v> || + std::is_same_v>) + MaxDiff *= Range.size(); + else + MaxDiff *= Range.get_global_range().size(); + + if (isPreciseResultFP()) + MaxDiff = 0; + T Diff = std::abs(CorrectRes - ComputedRes); + ErrorStr = ", Diff=" + std::to_string(Diff) + + ", MaxDiff=" + std::to_string(MaxDiff); + Passed = Diff <= MaxDiff; + } else { + Passed = ComputedRes == CorrectRes; + } + + std::cout << AddInfo << (Passed ? ". PASSED" : ". FAILED") << std::endl; + if (!Passed) { + printDeviceInfo(Q, true); + printTestLabel(IsSYCL2020, Range, true); + std::cerr << ", Computed value=" << ComputedRes + << ", Expected value=" << CorrectRes << ErrorStr + << (AddInfo.empty() ? "" : ", " + AddInfo) << std::endl; + } + return Passed ? 0 : 1; +} + +void printFinalStatus(int NumErrors) { + if (NumErrors == 0) + std::cout << "Test passed" << std::endl; else - std::cerr << "Device: " << Name << ", MaxWGSize: " << MaxWGSize - << ", LocalMemSize: " << LocalMemSize << std::endl; + std::cerr << NumErrors << " test-cases failed" << std::endl; +} + +template +auto createReduction(T *USMPtr, T Identity, BinaryOperation BOp) { + if constexpr (IsSYCL2020) + return sycl::reduction(USMPtr, Identity, BOp, getPropertyList()); + else + return ext::oneapi::reduction(USMPtr, Identity, BOp); +} + +template +auto createReduction(T *USMPtr, BinaryOperation BOp) { + if constexpr (IsSYCL2020) + return sycl::reduction(USMPtr, BOp, getPropertyList()); + else + return ext::oneapi::reduction(USMPtr, BOp); +} + +template +auto createReduction(BufferT ReduBuf, handler &CGH, T Identity, + BinaryOperation BOp) { + if constexpr (IsSYCL2020) { + property_list PropList = getPropertyList(); + return sycl::reduction(ReduBuf, CGH, Identity, BOp, PropList); + } else { + accessor Out(ReduBuf, + CGH); + return ext::oneapi::reduction(Out, Identity, BOp); + } +} + +aspect getUSMAspect(usm::alloc Alloc) { + if (Alloc == sycl::usm::alloc::host) + return aspect::usm_host_allocations; + + if (Alloc == sycl::usm::alloc::device) + return aspect::usm_device_allocations; + + assert(Alloc == usm::alloc::shared && "Unknown USM allocation type"); + return aspect::usm_shared_allocations; } From dfcbf0f968c59002bbe4c0c534de18b9903c04dc Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Mon, 9 Aug 2021 15:56:17 -0700 Subject: [PATCH 2/2] [SYCL] Disable the test reduction_nd_N_vars.cpp The test was supposed to be disabled by #389, but a wrong syntax was chosen there to do that. Fixed it here. Signed-off-by: Vyacheslav N Klochkov --- SYCL/Reduction/reduction_nd_N_vars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/Reduction/reduction_nd_N_vars.cpp b/SYCL/Reduction/reduction_nd_N_vars.cpp index 11373a8e3b..0f4b4ead79 100644 --- a/SYCL/Reduction/reduction_nd_N_vars.cpp +++ b/SYCL/Reduction/reduction_nd_N_vars.cpp @@ -4,7 +4,7 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // TODO: The test irregularly reports incorrect. -// UNSUPPORTED: TEMPORARY_DISABLED +// REQUIRES: TEMPORARY_DISABLED // This test checks handling of parallel_for() accepting nd_range and // two or more reductions.