diff --git a/SYCL/SubGroup/sub_group_as.cpp b/SYCL/SubGroup/sub_group_as.cpp new file mode 100644 index 0000000000..0fce33ba48 --- /dev/null +++ b/SYCL/SubGroup/sub_group_as.cpp @@ -0,0 +1,69 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Sub-groups are not suported on Host +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + cl::sycl::queue queue; + printf("Device Name = %s\n", + queue.get_device().get_info().c_str()); + + // Initialize some host memory + constexpr int N = 64; + int host_mem[N]; + for (int i = 0; i < N; ++i) { + host_mem[i] = i * 100; + } + + // Use the device to transform each value + { + cl::sycl::buffer buf(host_mem, N); + queue.submit([&](cl::sycl::handler &cgh) { + auto global = + buf.get_access(cgh); + sycl::accessor + local(N, cgh); + + cgh.parallel_for( + cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) { + cl::sycl::ONEAPI::sub_group sg = it.get_sub_group(); + if (!it.get_local_id(0)) { + int end = it.get_global_id(0) + it.get_local_range()[0]; + for (int i = it.get_global_id(0); i < end; i++) { + local[i] = i; + } + } + it.barrier(); + + int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) * + sg.get_max_local_range()[0]; + // Global address space + auto x = sg.load(&global[i]); + + // Local address space + auto y = sg.load(&local[i]); + + sg.store(&global[i], x + y); + }); + }); + } + + // Print results and tidy up + for (int i = 0; i < N; ++i) { + if (i * 101 != host_mem[i]) { + printf("Unexpected result %04d vs %04d\n", i * 101, host_mem[i]); + return 1; + } + } + printf("Success!\n"); + return 0; +} diff --git a/SYCL/SubGroup/sub_group_as_vec.cpp b/SYCL/SubGroup/sub_group_as_vec.cpp new file mode 100644 index 0000000000..b37af3363f --- /dev/null +++ b/SYCL/SubGroup/sub_group_as_vec.cpp @@ -0,0 +1,71 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Sub-groups are not suported on Host +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + cl::sycl::queue queue; + printf("Device Name = %s\n", + queue.get_device().get_info().c_str()); + + // Initialize some host memory + constexpr int N = 64; + sycl::vec host_mem[N]; + for (int i = 0; i < N; ++i) { + host_mem[i].s0() = i; + host_mem[i].s1() = 0; + } + + // Use the device to transform each value + { + cl::sycl::buffer, 1> buf(host_mem, N); + queue.submit([&](cl::sycl::handler &cgh) { + auto global = + buf.get_access(cgh); + sycl::accessor, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local(N, cgh); + cgh.parallel_for( + cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) { + cl::sycl::ONEAPI::sub_group sg = it.get_sub_group(); + if (!it.get_local_id(0)) { + int end = it.get_global_id(0) + it.get_local_range()[0]; + for (int i = it.get_global_id(0); i < end; i++) { + local[i].s0() = 0; + local[i].s1() = i; + } + } + it.barrier(); + + int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) * + sg.get_max_local_range()[0]; + // Global address space + auto x = sg.load(&global[i]); + + // Local address space + auto y = sg.load(&local[i]); + + sg.store(&global[i], x + y); + }); + }); + } + + // Print results and tidy up + for (int i = 0; i < N; ++i) { + if (i != host_mem[i].s0() || i != host_mem[i].s1()) { + printf("Unexpected result [%02d,%02d] vs [%02d,%02d]\n", i, i, + host_mem[i].s0(), host_mem[i].s1()); + return 1; + } + } + printf("Success!\n"); + return 0; +} diff --git a/SYCL/lit.cfg.py b/SYCL/lit.cfg.py index 947d88607f..921b93acbd 100644 --- a/SYCL/lit.cfg.py +++ b/SYCL/lit.cfg.py @@ -280,7 +280,7 @@ if config.sycl_be == 'cuda': config.substitutions.append( ('%sycl_triple', "nvptx64-nvidia-cuda-sycldevice" ) ) else: - config.substitutions.append( ('%sycl_triple', "spir64-unknown-linux-sycldevice" ) ) + config.substitutions.append( ('%sycl_triple', "spir64-unknown-unknown-sycldevice" ) ) if find_executable('sycl-ls'): config.available_features.add('sycl-ls') diff --git a/SYCL/lit.site.cfg.py.in b/SYCL/lit.site.cfg.py.in index a401384be9..9916def273 100644 --- a/SYCL/lit.site.cfg.py.in +++ b/SYCL/lit.site.cfg.py.in @@ -14,8 +14,6 @@ config.sycl_tools_dir = config.llvm_tools_dir config.sycl_include = os.path.join(config.dpcpp_root_dir, 'include', 'sycl') config.sycl_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" config.sycl_libs_dir = os.path.join(config.dpcpp_root_dir, ('bin' if platform.system() == "Windows" else 'lib')) -config.target_triple = "x86_64-unknown-unknown-gnu" -config.host_triple = "x86_64-unknown-unknown-gnu" config.opencl_libs_dir = (os.path.dirname("@OpenCL_LIBRARY@") if "@OpenCL_LIBRARY@" else "") config.level_zero_libs_dir = "@LEVEL_ZERO_LIBS_DIR@"