-
Notifications
You must be signed in to change notification settings - Fork 131
[SYCL][ESIMD] Add simd::copy* E2E tests. #236
Changes from all commits
5ec880d
4b53e5b
85198b0
561a089
550326f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
//==------- simd_memory_access.cpp - DPC++ ESIMD on-device test -----------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// REQUIRES: gpu | ||
// UNSUPPORTED: cuda | ||
// RUN: %clangxx -fsycl %s -o %t.out | ||
// RUN: %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// The test checks functionality of the memory access APIs which are members of | ||
// the simd class. | ||
|
||
#include "../esimd_test_utils.hpp" | ||
|
||
#include <CL/sycl.hpp> | ||
#include <sycl/ext/intel/experimental/esimd.hpp> | ||
|
||
#include <iostream> | ||
|
||
using namespace cl::sycl; | ||
using namespace sycl::ext::intel::experimental::esimd; | ||
|
||
template <typename T> | ||
using Acc = | ||
accessor<T, 1, access_mode::read_write, access::target::global_buffer>; | ||
|
||
template <typename T, int N, bool IsAcc> struct Kernel; | ||
|
||
// Accessor-based kernel. | ||
template <typename T, int N> struct Kernel<T, N, true> { | ||
Acc<T> acc; | ||
Kernel(Acc<T> acc) : acc(acc) {} | ||
|
||
void operator()(id<1> i) const SYCL_ESIMD_KERNEL { | ||
const uint32_t ii = static_cast<uint32_t>(i.get(0)); | ||
simd<T, N> v; | ||
const auto offset = ii * sizeof(v); | ||
v.copy_from(acc, offset); | ||
v += simd<T, N>(ii * N, 1); | ||
v.copy_to(acc, offset); | ||
} | ||
}; | ||
|
||
// Pointer-based kernel. | ||
template <typename T, int N> struct Kernel<T, N, false> { | ||
T *ptr; | ||
Kernel(T *ptr) : ptr(ptr) {} | ||
|
||
void operator()(id<1> i) const SYCL_ESIMD_KERNEL { | ||
const uint32_t ii = static_cast<uint32_t>(i.get(0)); | ||
simd<T, N> v; | ||
const auto offset = ii * (sizeof(v) / sizeof(T)); | ||
v.copy_from(ptr + offset); | ||
v += simd<T, N>(ii * N, 1); | ||
v.copy_to(ptr + offset); | ||
} | ||
}; | ||
|
||
template <typename T> struct char_to_int { | ||
using type = typename std::conditional< | ||
sizeof(T) == 1, | ||
typename std::conditional<std::is_signed<T>::value, int, unsigned>::type, | ||
T>::type; | ||
}; | ||
|
||
template <bool IsAcc, typename T> void free_mem(T *ptr, sycl::queue q) { | ||
if constexpr (IsAcc) { | ||
delete[] ptr; | ||
} else { | ||
sycl::free(ptr, q); | ||
} | ||
} | ||
|
||
// The main test routine. | ||
template <typename T, int N, bool IsAcc> bool test(queue q, size_t size) { | ||
std::cout << "Testing T=" << typeid(T).name() << ", N=" << N | ||
<< " using accessor=" << IsAcc << "...\n"; | ||
T *A; | ||
if constexpr (IsAcc) { | ||
A = new T[size]; | ||
} else { | ||
A = reinterpret_cast<T *>(sycl::malloc_shared(size, q)); | ||
} | ||
|
||
for (unsigned i = 0; i < size; ++i) { | ||
A[i] = i; // should not be zero to test `copy_from` really works | ||
} | ||
|
||
try { | ||
if constexpr (IsAcc) { | ||
buffer<T, 1> buf(A, range<1>(size)); | ||
range<1> glob_range{size / N}; | ||
|
||
auto e = q.submit([&](handler &cgh) { | ||
auto acc = buf.template get_access<access::mode::read_write>(cgh); | ||
Kernel<T, N, true> kernel(acc); | ||
cgh.parallel_for(glob_range, kernel); | ||
}); | ||
} else { | ||
range<1> glob_range{size / N}; | ||
|
||
auto e = q.submit([&](handler &cgh) { | ||
Kernel<T, N, false> kernel(A); | ||
cgh.parallel_for(glob_range, kernel); | ||
}); | ||
} | ||
q.wait_and_throw(); | ||
} catch (cl::sycl::exception const &e) { | ||
std::cout << "SYCL exception caught: " << e.what() << '\n'; | ||
free_mem<IsAcc>(A, q); | ||
return e.get_cl_code(); | ||
} | ||
|
||
int err_cnt = 0; | ||
|
||
for (unsigned i = 0; i < size; ++i) { | ||
T gold = (T)(i * 2); | ||
T val = A[i]; | ||
|
||
if (val != gold) { | ||
if (++err_cnt < 10) { | ||
using T1 = typename char_to_int<T>::type; | ||
std::cout << "failed at index " << i << ": " << (T1)val | ||
<< " != " << (T1)gold << " (gold)\n"; | ||
} | ||
} | ||
} | ||
if (err_cnt > 0) { | ||
std::cout << " pass rate: " | ||
<< ((float)(size - err_cnt) / (float)size) * 100.0f << "% (" | ||
<< (size - err_cnt) << "/" << size << ")\n"; | ||
} | ||
|
||
free_mem<IsAcc>(A, q); | ||
|
||
std::cout << (err_cnt > 0 ? " FAILED\n" : " Passed\n"); | ||
return err_cnt > 0 ? false : true; | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
size_t size = 32 * 7; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it intentionally There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes |
||
|
||
if (argc > 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need that code? Is it left intentionally for debugging purposes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is left intentionally, to be able to run with different size. |
||
size = atoi(argv[1]); | ||
size = size == 0 ? 128 : size; | ||
} | ||
if (size % 32 != 0) { | ||
std::cerr << "*** ERROR: size (" << size << ") must be a multiple of 32\n"; | ||
return 2; | ||
} | ||
std::cout << "Using size=" << size << "\n"; | ||
queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); | ||
|
||
auto dev = q.get_device(); | ||
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n"; | ||
|
||
bool passed = true; | ||
passed &= test<char, 32, true>(q, size); | ||
passed &= test<unsigned char, 16, true>(q, size); | ||
passed &= test<short, 32, true>(q, size); | ||
passed &= test<short, 16, true>(q, size); | ||
passed &= test<unsigned short, 8, true>(q, size); | ||
passed &= test<int, 32, true>(q, size); | ||
passed &= test<unsigned int, 32, true>(q, size); | ||
passed &= test<float, 32, true>(q, size); | ||
|
||
passed &= test<char, 32, false>(q, size); | ||
passed &= test<unsigned char, 16, false>(q, size); | ||
passed &= test<short, 32, false>(q, size); | ||
passed &= test<short, 16, false>(q, size); | ||
passed &= test<unsigned short, 8, false>(q, size); | ||
passed &= test<int, 32, false>(q, size); | ||
passed &= test<unsigned int, 32, false>(q, size); | ||
passed &= test<float, 32, false>(q, size); | ||
|
||
std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n"); | ||
return passed ? 0 : 1; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: would it become more readable if we just split USM- and accessor-based tests into separate files? Some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it might. But I'd prefer to have this in a single file to avoid code duplication and reduce maintenance burden. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We copy the source data into the original destination. If the
copy_from
andcopy_to
would be unimplemented the test will pass as well. Maybe we can test introduce a separate destination buffer and test copyingA -> B
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is a good point!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, not quite. The test does:
v.copy_from(ptr + offset); v += simd<T, N>(ii * N, 1); v.copy_to(ptr + offset);
I.e. the written value depends both on the original value. So if either does not work test should fail. But the choice of '0' as the initial value is bad indeed, as there is a chance
v
can be initialized to zeroes even ifcopy_from
does not work. I'll fix that.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean, the point is still good :), as it lead to problem discovery, but it is not fully valid.