Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Fix warnings in reduction tests; make them more stable and verbose #394

Merged
merged 2 commits into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 22 additions & 32 deletions SYCL/Reduction/reduction_big_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// RUNx: %HOST_RUN_PLACEHOLDER %t.out
// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and
// barrier()

// This test performs basic checks of parallel_for(nd_range, reduction, func)
// where the bigger data size and/or non-uniform work-group sizes may cause
// errors.

#include "reduction_utils.hpp"
#include <CL/sycl.hpp>

#include <algorithm>
#include <cassert>

using namespace cl::sycl;

Expand All @@ -27,14 +26,14 @@ size_t getSafeMaxWGSize(size_t MaxWGSize, size_t MemSize, size_t OneElemSize) {
return std::min(MaxNumElems / 2, MaxWGSize);
}

template <typename KernelName, typename T, int Dim, class BinaryOperation>
void test(queue &Q, T Identity) {
device Device = Q.get_device();
template <typename Name, typename T, int Dim, class BinaryOperation>
int test(queue &Q, T Identity) {
// It seems enough to test just one case - SYCL2020 reduction.
constexpr bool IsSYCL2020 = true;

device Device = Q.get_device();
std::size_t MaxWGSize = Device.get_info<info::device::max_work_group_size>();
std::size_t LocalMemSize = Device.get_info<info::device::local_mem_size>();
std::cout << "Detected device::max_work_group_size = " << MaxWGSize << "\n";
std::cout << "Detected device::local_mem_size = " << LocalMemSize << "\n";

size_t WGSize = getSafeMaxWGSize(MaxWGSize, LocalMemSize, sizeof(T));

Expand All @@ -44,15 +43,12 @@ void test(queue &Q, T Identity) {
size_t NWorkItems = std::min(WGSize * MaxWGSize + 1, MaxGlobalRange);

size_t NWorkGroups = (NWorkItems - 1) / WGSize + 1;
range<1> GlobalRange(NWorkGroups * WGSize);
range<1> LocalRange(WGSize);
nd_range<1> NDRange(GlobalRange, LocalRange);
std::cout << "Running the test with: GlobalRange = " << (NWorkGroups * WGSize)
<< ", LocalRange = " << WGSize << ", NWorkItems = " << NWorkItems
<< "\n";
nd_range<1> NDRange(range<1>{NWorkGroups * WGSize}, range<1>{WGSize});
printTestLabel<T, BinaryOperation>(IsSYCL2020, NDRange);

buffer<T, 1> InBuf(NWorkItems);
buffer<T, 1> OutBuf(1);
(OutBuf.template get_access<access::mode::write>())[0] = Identity;

// Initialize.
BinaryOperation BOp;
Expand All @@ -62,25 +58,17 @@ void test(queue &Q, T Identity) {
// Compute.
Q.submit([&](handler &CGH) {
auto In = InBuf.template get_access<access::mode::read>(CGH);
accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
Out(OutBuf, CGH);
CGH.parallel_for<KernelName>(NDRange, ONEAPI::reduction(Out, Identity, BOp),
[=](nd_item<1> NDIt, auto &Sum) {
if (NDIt.get_global_linear_id() < NWorkItems)
Sum.combine(
In[NDIt.get_global_linear_id()]);
});
CGH.parallel_for<Name>(NDRange, sycl::reduction(OutBuf, CGH, Identity, BOp),
[=](nd_item<1> NDIt, auto &Sum) {
if (NDIt.get_global_linear_id() < NWorkItems)
Sum.combine(In[NDIt.get_global_linear_id()]);
});
});

// Check correctness.
auto Out = OutBuf.template get_access<access::mode::read>();
T ComputedOut = *(Out.get_pointer());
if (ComputedOut != CorrectOut) {
std::cout << "Computed value: " << ComputedOut
<< ", Expected value: " << CorrectOut << "\n";
assert(0 && "Wrong value.");
}
std::cout << "Test case passed\n\n";
return checkResults(Q, IsSYCL2020, BOp, NDRange, ComputedOut, CorrectOut);
}

template <typename T> struct BigCustomVec : public CustomVec<T> {
Expand All @@ -99,11 +87,13 @@ template <class T> struct BigCustomVecPlus {

int main() {
queue Q;
test<class Test1, float, 0, ONEAPI::maximum<>>(Q, getMinimumFPValue<float>());
printDeviceInfo(Q);
int NumErrors = test<class A1, float, 0, ext::oneapi::maximum<>>(
Q, getMinimumFPValue<float>());

using BCV = BigCustomVec<long long>;
test<class Test2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));
NumErrors += test<class A2, BCV, 1, BigCustomVecPlus<long long>>(Q, BCV(0));

std::cout << "Test passed\n";
return 0;
printFinalStatus(NumErrors);
return NumErrors;
}
155 changes: 55 additions & 100 deletions SYCL/Reduction/reduction_nd_N_vars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,13 @@
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: The test irregularly reports incorrect.
// UNSUPPORTED: TEMPORARY_DISABLED
// REQUIRES: TEMPORARY_DISABLED

// This test checks handling of parallel_for() accepting nd_range and
// two or more reductions.

#include "reduction_utils.hpp"

#include <CL/sycl.hpp>

#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <numeric>
#include <string>

using namespace cl::sycl;

template <typename... Ts> class KNameGroup;
Expand All @@ -29,22 +19,15 @@ template <typename T, bool B> class KName;
constexpr access::mode RW = access::mode::read_write;
constexpr access::mode DW = access::mode::discard_write;

template <typename T>
bool cherkResultIsExpected(int TestCaseNum, T Expected, T Computed,
bool IsSYCL2020) {
bool Success;
if (!std::is_floating_point<T>::value)
Success = (Expected == Computed);
else
Success = std::abs((Expected / Computed) - 1) < 0.5;

if (!Success) {
std::cerr << "Is SYCL2020 mode: " << IsSYCL2020 << std::endl;
std::cerr << TestCaseNum << ": Expected value = " << Expected
<< ", Computed value = " << Computed << "\n";
}

return Success;
template <typename RangeT>
void printNVarsTestLabel(bool IsSYCL2020, const RangeT &Range,
bool ToCERR = false) {
std::ostream &OS = ToCERR ? std::cerr : std::cout;
std::string Mode = IsSYCL2020 ? "SYCL2020" : "ONEAPI ";
OS << (ToCERR ? "Error" : "Start") << ": Mode=" << Mode
<< ", Range=" << Range;
if (!ToCERR)
OS << std::endl;
}

// Returns 0 if the test case passed. Otherwise, some non-zero value.
Expand All @@ -58,6 +41,10 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
T3 IdentityVal3, T3 InitVal3, BinaryOperation3 BOp3,
T4 IdentityVal4, T3 InitVal4, BinaryOperation4 BOp4,
usm::alloc AllocType4, size_t NWorkItems, size_t WGSize) {

auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
printNVarsTestLabel<>(IsSYCL2020, NDR);

buffer<T1, 1> InBuf1(NWorkItems);
buffer<T2, 1> InBuf2(NWorkItems);
buffer<T3, 1> InBuf3(NWorkItems);
Expand Down Expand Up @@ -120,64 +107,33 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
}
}

auto NDR = nd_range<1>{range<1>(NWorkItems), range<1>{WGSize}};
if constexpr (IsSYCL2020) {
Q.submit([&](handler &CGH) {
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);

auto Redu1 = sycl::reduction(OutBuf1, CGH, IdentityVal1, BOp1,
getPropertyList<Mode1>());
auto Redu2 = sycl::reduction(OutBuf2, CGH, IdentityVal2, BOp2,
getPropertyList<Mode2>());
auto Redu3 = sycl::reduction(OutBuf3, CGH, IdentityVal3, BOp3,
getPropertyList<Mode3>());
auto Redu4 =
sycl::reduction(Out4, IdentityVal4, BOp4, getPropertyList<Mode4>());

auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
auto &Sum4) {
size_t I = NDIt.get_global_id(0);
Sum1.combine(In1[I]);
Sum2.combine(In2[I]);
Sum3.combine(In3[I]);
Sum4.combine(In4[I]);
};
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
}).wait();
} else {
// Test ONEAPI reductions
Q.submit([&](handler &CGH) {
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);

auto Out1 = OutBuf1.template get_access<Mode1>(CGH);
auto Out2 = OutBuf2.template get_access<Mode2>(CGH);
accessor<T3, 0, Mode3, access::target::global_buffer> Out3(OutBuf3, CGH);

auto Redu1 = ONEAPI::reduction(Out1, IdentityVal1, BOp1);
auto Redu2 = ONEAPI::reduction(Out2, IdentityVal2, BOp2);
auto Redu3 = ONEAPI::reduction(Out3, IdentityVal3, BOp3);
auto Redu4 = ONEAPI::reduction(Out4, IdentityVal4, BOp4);

auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
auto &Sum4) {
size_t I = NDIt.get_global_id(0);
Sum1.combine(In1[I]);
Sum2.combine(In2[I]);
Sum3.combine(In3[I]);
Sum4.combine(In4[I]);
};
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
}).wait();
}
Q.submit([&](handler &CGH) {
auto In1 = InBuf1.template get_access<access::mode::read>(CGH);
auto In2 = InBuf2.template get_access<access::mode::read>(CGH);
auto In3 = InBuf3.template get_access<access::mode::read>(CGH);
auto In4 = InBuf4.template get_access<access::mode::read>(CGH);

auto Redu1 =
createReduction<IsSYCL2020, Mode1>(OutBuf1, CGH, IdentityVal1, BOp1);
auto Redu2 =
createReduction<IsSYCL2020, Mode2>(OutBuf2, CGH, IdentityVal2, BOp2);
auto Redu3 =
createReduction<IsSYCL2020, Mode3>(OutBuf3, CGH, IdentityVal3, BOp3);
auto Redu4 = createReduction<IsSYCL2020, Mode4>(Out4, IdentityVal4, BOp4);

auto Lambda = [=](nd_item<1> NDIt, auto &Sum1, auto &Sum2, auto &Sum3,
auto &Sum4) {
size_t I = NDIt.get_global_id(0);
Sum1.combine(In1[I]);
Sum2.combine(In2[I]);
Sum3.combine(In3[I]);
Sum4.combine(In4[I]);
};
CGH.parallel_for<Name>(NDR, Redu1, Redu2, Redu3, Redu4, Lambda);
}).wait();

// Check the results and free memory.
int Error = 0;
int NumErrors = 0;
{
auto Out1 = OutBuf1.template get_access<access::mode::read>();
auto Out2 = OutBuf2.template get_access<access::mode::read>();
Expand All @@ -195,22 +151,23 @@ int testOne(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,
Out4Val = *Out4;
}

Error += cherkResultIsExpected(1, CorrectOut1, Out1[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(2, CorrectOut2, Out2[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(3, CorrectOut3, Out3[0], IsSYCL2020) ? 0 : 1;
Error += cherkResultIsExpected(4, CorrectOut4, Out4Val, IsSYCL2020) ? 0 : 1;
std::string AddInfo = "TestCase=";
NumErrors += checkResults(Q, IsSYCL2020, BOp1, NDR, Out1[0], CorrectOut1,
AddInfo + std::to_string(1));
NumErrors += checkResults(Q, IsSYCL2020, BOp2, NDR, Out2[0], CorrectOut2,
AddInfo + std::to_string(2));
NumErrors += checkResults(Q, IsSYCL2020, BOp3, NDR, Out3[0], CorrectOut3,
AddInfo + std::to_string(3));
NumErrors += checkResults(Q, IsSYCL2020, BOp4, NDR, Out4Val, CorrectOut4,
AddInfo + std::to_string(4));
free(Out4, Q.get_context());
}

if (Error)
std::cerr << "The test failed for nd_range(" << NWorkItems << "," << WGSize
<< ")\n\n";

return Error;
return NumErrors;
}

// Tests both implementations of reduction:
// sycl::reduction and sycl::ONEAPI::reduction
// sycl::reduction and sycl::ext::oneapi::reduction
template <class Name, typename T1, access::mode Mode1, typename T2,
access::mode Mode2, typename T3, access::mode Mode3, typename T4,
access::mode Mode4, class BinaryOperation1, class BinaryOperation2,
Expand All @@ -236,18 +193,16 @@ int testBoth(queue &Q, T1 IdentityVal1, T1 InitVal1, BinaryOperation1 BOp1,

int main() {
queue Q;
printDeviceInfo(Q);
int Error = testBoth<class Case1, float, DW, int, RW, short, RW, int, RW>(
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000,
Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000,
std::bit_or<>{}, 0, 8000, std::bit_xor<>{}, usm::alloc::shared, 16, 16);

auto Add = [](auto x, auto y) { return (x + y); };
Error += testBoth<class Case2, float, RW, int, RW, short, DW, int, DW>(
Q, 0, 1000, std::plus<float>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0,
8000, std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);
Q, 0, 1000, std::plus<>{}, 0, 2000, std::plus<>{}, 0, 4000, Add, 0, 8000,
std::plus<>{}, usm::alloc::device, 5 * (256 + 1), 5);

if (!Error)
std::cout << "Test passed\n";
else
std::cerr << Error << " test-cases failed\n";
printFinalStatus(Error);
return Error;
}
Loading