diff --git a/sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc b/sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc index af7ed1771f393..e414430bda03f 100755 --- a/sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc +++ b/sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc @@ -134,12 +134,18 @@ The load and store sub-group functions enable developers to assert that all work |=== |Function|Description +|+template T load(sub_group sg, const T *src)+ +|Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion. + |+template T load(sub_group sg, const multi_ptr src)+ |Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+. |+template vec load(sub_group sg, const multi_ptr src)+ |Load contiguous data from _src_. Returns _N_ elements per work-item, corresponding to the _N_ memory locations at _src_ + +i+ * +get_max_local_range()+ + +get_local_id()+ for +i+ between 0 and _N_. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+. +|+template void store(sub_group sg, T *dst, const T& x)+ +|Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion. + |+template void store(sub_group sg, multi_ptr dst, const T& x)+ |Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+. @@ -165,6 +171,7 @@ None. |======================================== |Rev|Date|Author|Changes |1|2020-03-16|John Pennycook|*Initial public working draft* +|2|2021-02-26|Vladimir Lazarev|*Add load/store method for raw pointers* |======================================== //************************************************************************ diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index dc8515f0bbe05..8de17670f1e21 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -170,6 +170,30 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) +extern SYCL_EXTERNAL __attribute__((opencl_global)) void * +__spirv_GenericCastToPtrExplicit_ToGlobal(const void *Ptr, + __spv::StorageClass::Flag S) noexcept; + +extern SYCL_EXTERNAL __attribute__((opencl_local)) void * +__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr, + __spv::StorageClass::Flag S) noexcept; + +template +extern __attribute__((opencl_global)) dataT * +__spirv_GenericCastToPtrExplicit_ToGlobal( + const void *Ptr, __spv::StorageClass::Flag S) noexcept { + return (__attribute__((opencl_global)) + dataT *)__spirv_GenericCastToPtrExplicit_ToGlobal(Ptr, S); +} + +template +extern __attribute__((opencl_local)) dataT * +__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr, + __spv::StorageClass::Flag S) noexcept { + return (__attribute__((opencl_local)) + dataT *)__spirv_GenericCastToPtrExplicit_ToLocal(Ptr, S); +} + template __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept; diff --git a/sycl/include/CL/__spirv/spirv_types.hpp b/sycl/include/CL/__spirv/spirv_types.hpp index a938ae0732da0..15564c11a59dc 100644 --- a/sycl/include/CL/__spirv/spirv_types.hpp +++ b/sycl/include/CL/__spirv/spirv_types.hpp @@ -36,6 +36,46 @@ struct Scope { Flag flag_value; }; +struct StorageClass { + enum Flag : uint32_t { + UniformConstant = 0, + Input = 1, + Uniform = 2, + Output = 3, + Workgroup = 4, + CrossWorkgroup = 5, + Private = 6, + Function = 7, + Generic = 8, + PushConstant = 9, + AtomicCounter = 10, + Image = 11, + StorageBuffer = 12, + CallableDataKHR = 5328, + CallableDataNV = 5328, + IncomingCallableDataKHR = 5329, + IncomingCallableDataNV = 5329, + RayPayloadKHR = 5338, + RayPayloadNV = 5338, + HitAttributeKHR = 5339, + HitAttributeNV = 5339, + IncomingRayPayloadKHR = 5342, + IncomingRayPayloadNV = 5342, + ShaderRecordBufferKHR = 5343, + ShaderRecordBufferNV = 5343, + PhysicalStorageBuffer = 5349, + PhysicalStorageBufferEXT = 5349, + CodeSectionINTEL = 5605, + CapabilityUSMStorageClassesINTEL = 5935, + DeviceOnlyINTEL = 5936, + HostOnlyINTEL = 5937, + Max = 0x7fffffff, + }; + constexpr StorageClass(Flag flag) : flag_value(flag) {} + constexpr operator uint32_t() const { return flag_value; } + Flag flag_value; +}; + struct MemorySemanticsMask { enum Flag : uint32_t { diff --git a/sycl/include/CL/sycl/ONEAPI/sub_group.hpp b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp index 71ec92f176b47..121d94a5141a2 100644 --- a/sycl/include/CL/sycl/ONEAPI/sub_group.hpp +++ b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp @@ -224,6 +224,47 @@ struct sub_group { /* --- sub_group load/stores --- */ /* these can map to SIMD or block read/write hardware where available */ +#ifdef __SYCL_DEVICE_ONLY__ + // Method for decorated pointer + template + detail::enable_if_t< + !std::is_same::type, T>::value, T> + load(T *src) const { + return load(sycl::multi_ptr::type, + sycl::detail::deduce_AS::value>( + (typename detail::remove_AS::type *)src)); + } + + // Method for raw pointer + template + detail::enable_if_t< + std::is_same::type, T>::value, T> + load(T *src) const { + +#ifdef __NVPTX__ + return src[get_local_id()[0]]; +#else // __NVPTX__ + auto l = __spirv_GenericCastToPtrExplicit_ToLocal( + src, __spv::StorageClass::Workgroup); + if (l) + return load(l); + + auto g = __spirv_GenericCastToPtrExplicit_ToGlobal( + src, __spv::StorageClass::CrossWorkgroup); + if (g) + return load(g); + + assert(!"Sub-group load() is supported for local or global pointers only."); + return {}; +#endif // __NVPTX__ + } +#else //__SYCL_DEVICE_ONLY__ + template T load(T *src) const { + (void)src; + throw runtime_error("Sub-groups are not supported on host device.", + PI_INVALID_DEVICE); + } +#endif //__SYCL_DEVICE_ONLY__ template sycl::detail::enable_if_t< @@ -315,6 +356,55 @@ struct sub_group { #endif } +#ifdef __SYCL_DEVICE_ONLY__ + // Method for decorated pointer + template + detail::enable_if_t< + !std::is_same::type, T>::value> + store(T *dst, const typename detail::remove_AS::type &x) const { + store(sycl::multi_ptr::type, + sycl::detail::deduce_AS::value>( + (typename detail::remove_AS::type *)dst), + x); + } + + // Method for raw pointer + template + detail::enable_if_t< + std::is_same::type, T>::value> + store(T *dst, const typename detail::remove_AS::type &x) const { + +#ifdef __NVPTX__ + dst[get_local_id()[0]] = x; +#else // __NVPTX__ + auto l = __spirv_GenericCastToPtrExplicit_ToLocal( + dst, __spv::StorageClass::Workgroup); + if (l) { + store(l, x); + return; + } + + auto g = __spirv_GenericCastToPtrExplicit_ToGlobal( + dst, __spv::StorageClass::CrossWorkgroup); + if (g) { + store(g, x); + return; + } + + assert( + !"Sub-group store() is supported for local or global pointers only."); + return; +#endif // __NVPTX__ + } +#else //__SYCL_DEVICE_ONLY__ + template void store(T *dst, const T &x) const { + (void)dst; + (void)x; + throw runtime_error("Sub-groups are not supported on host device.", + PI_INVALID_DEVICE); + } +#endif //__SYCL_DEVICE_ONLY__ + template sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore::value> diff --git a/sycl/include/CL/sycl/access/access.hpp b/sycl/include/CL/sycl/access/access.hpp index fbca743baaa14..8f4b9a92804e8 100644 --- a/sycl/include/CL/sycl/access/access.hpp +++ b/sycl/include/CL/sycl/access/access.hpp @@ -187,18 +187,16 @@ template struct DecoratedType { using type = __OPENCL_LOCAL_AS__ ElementType; }; - -template -struct remove_AS { - typedef T type; -}; +template struct remove_AS { typedef T type; }; #ifdef __SYCL_DEVICE_ONLY__ -template -struct remove_AS<__OPENCL_GLOBAL_AS__ T> { - typedef T type; +template struct deduce_AS { + static_assert(!std::is_same::type, T>::value, + "Only types with address space attributes are supported"); }; +template struct remove_AS<__OPENCL_GLOBAL_AS__ T> { typedef T type; }; + #ifdef __ENABLE_USM_ADDR_SPACE__ template struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> { typedef T type; @@ -207,21 +205,45 @@ template struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> { template struct remove_AS<__OPENCL_GLOBAL_HOST_AS__ T> { typedef T type; }; + +template struct deduce_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> { + static const access::address_space value = + access::address_space::global_device_space; +}; + +template struct deduce_AS<__OPENCL_GLOBAL_HOST_AS__ T> { + static const access::address_space value = + access::address_space::global_host_space; +}; #endif // __ENABLE_USM_ADDR_SPACE__ -template -struct remove_AS<__OPENCL_PRIVATE_AS__ T> { +template struct remove_AS<__OPENCL_PRIVATE_AS__ T> { typedef T type; }; -template -struct remove_AS<__OPENCL_LOCAL_AS__ T> { +template struct remove_AS<__OPENCL_LOCAL_AS__ T> { typedef T type; }; + +template struct remove_AS<__OPENCL_CONSTANT_AS__ T> { typedef T type; }; -template -struct remove_AS<__OPENCL_CONSTANT_AS__ T> { - typedef T type; +template struct deduce_AS<__OPENCL_GLOBAL_AS__ T> { + static const access::address_space value = + access::address_space::global_space; +}; + +template struct deduce_AS<__OPENCL_PRIVATE_AS__ T> { + static const access::address_space value = + access::address_space::private_space; +}; + +template struct deduce_AS<__OPENCL_LOCAL_AS__ T> { + static const access::address_space value = access::address_space::local_space; +}; + +template struct deduce_AS<__OPENCL_CONSTANT_AS__ T> { + static const access::address_space value = + access::address_space::constant_space; }; #endif @@ -231,8 +253,7 @@ struct remove_AS<__OPENCL_CONSTANT_AS__ T> { #undef __OPENCL_LOCAL_AS__ #undef __OPENCL_CONSTANT_AS__ #undef __OPENCL_PRIVATE_AS__ - } // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index ec833e14f13dc..a2a12c8797a76 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -54,7 +54,7 @@ set_target_properties(check-sycl-deploy PROPERTIES FOLDER "SYCL tests") add_lit_testsuite(check-sycl-spirv "Running device-agnostic SYCL regression tests for SPIR-V" ${CMAKE_CURRENT_BINARY_DIR} ARGS ${RT_TEST_ARGS} - PARAMS "SYCL_TRIPLE=spir64-unknown-linux-sycldevice" + PARAMS "SYCL_TRIPLE=spir64-unknown-unknown-sycldevice" DEPENDS ${SYCL_TEST_DEPS} EXCLUDE_FROM_CHECK_ALL ) diff --git a/sycl/test/basic_tests/address_space_traits.cpp b/sycl/test/basic_tests/address_space_traits.cpp new file mode 100644 index 0000000000000..92f345e87fd80 --- /dev/null +++ b/sycl/test/basic_tests/address_space_traits.cpp @@ -0,0 +1,38 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -D__ENABLE_USM_ADDR_SPACE__ -fsycl-targets=%sycl_triple %s -c + +#include +#include + +using namespace cl::sycl; +int main() { + + queue myQueue; + myQueue.submit([&](handler &cgh) { + cgh.single_task([=]() { + static_assert( + detail::deduce_AS<__attribute__((opencl_global)) int>::value == + access::address_space::global_space, + "Unexpected address space"); + static_assert( + detail::deduce_AS<__attribute__((opencl_local)) int>::value == + access::address_space::local_space, + "Unexpected address space"); + static_assert( + detail::deduce_AS<__attribute__((opencl_private)) int>::value == + access::address_space::private_space, + "Unexpected address space"); + static_assert( + detail::deduce_AS<__attribute__((opencl_constant)) int>::value == + access::address_space::constant_space, + "Unexpected address space"); + static_assert( + detail::deduce_AS<__attribute__((opencl_global_device)) int>::value == + access::address_space::global_device_space, + "Unexpected address space"); + static_assert( + detail::deduce_AS<__attribute__((opencl_global_host)) int>::value == + access::address_space::global_host_space, + "Unexpected address space"); + }); + }); +} diff --git a/sycl/test/extensions/sub_group_as.cpp b/sycl/test/extensions/sub_group_as.cpp new file mode 100644 index 0000000000000..bb515a7f23042 --- /dev/null +++ b/sycl/test/extensions/sub_group_as.cpp @@ -0,0 +1,88 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -O3 -S -emit-llvm -x c++ %s -o - | FileCheck %s + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + cl::sycl::queue queue; + printf("Device Name = %s\n", + queue.get_device().get_info().c_str()); + + // Initialize some host memory + constexpr int N = 64; + int host_mem[N]; + for (int i = 0; i < N; ++i) { + host_mem[i] = i * 100; + } + + // Use the device to transform each value + { + cl::sycl::buffer buf(host_mem, N); + queue.submit([&](cl::sycl::handler &cgh) { + auto global = + buf.get_access(cgh); + sycl::accessor + local(N, cgh); + + cgh.parallel_for( + cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) { + int v[N] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + cl::sycl::ONEAPI::sub_group sg = it.get_sub_group(); + if (!it.get_local_id(0)) { + int end = it.get_global_id(0) + it.get_local_range()[0]; + for (int i = it.get_global_id(0); i < end; i++) { + local[i] = i; + } + } + // CHECK: call void {{.*}}spirv_ControlBarrierjjj + it.barrier(); + + int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) * + sg.get_max_local_range()[0]; + + // load for global address space + // CHECK: call spir_func i8 addrspace(3)* {{.*}}spirv_GenericCastToPtrExplicit_ToLocal{{.*}}(i8 addrspace(4)* + // CHECK: {{.*}}SubgroupLocalInvocationId + // CHECK: call spir_func i8 addrspace(1)* {{.*}}spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}(i8 addrspace(4)* + // CHECK: call spir_func i32 {{.*}}spirv_SubgroupBlockRead{{.*}}(i32 addrspace(1)* + // CHECK: call spir_func void {{.*}}assert + auto x = sg.load(&global[i]); + + // load() for local address space + // CHECK: call spir_func i8 addrspace(3)* {{.*}}spirv_GenericCastToPtrExplicit_ToLocal{{.*}}(i8 addrspace(4)* + // CHECK: {{.*}}SubgroupLocalInvocationId + // CHECK: call spir_func i8 addrspace(1)* {{.*}}spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}(i8 addrspace(4)* + // CHECK: call spir_func i32 {{.*}}spirv_SubgroupBlockRead{{.*}}(i32 addrspace(1)* + // CHECK: call spir_func void {{.*}}assert + auto y = sg.load(&local[i]); + + // load() for private address space + // CHECK: call spir_func i8 addrspace(3)* {{.*}}spirv_GenericCastToPtrExplicit_ToLocal{{.*}}(i8 addrspace(4)* + // CHECK: {{.*}}SubgroupLocalInvocationId + // CHECK: call spir_func i8 addrspace(1)* {{.*}}spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}(i8 addrspace(4)* + // CHECK: call spir_func i32 {{.*}}spirv_SubgroupBlockRead{{.*}}(i32 addrspace(1)* + // CHECK: call spir_func void {{.*}}assert + auto z = sg.load(v + i); + + // store() for global address space + // CHECK: call spir_func i8 addrspace(3)* {{.*}}spirv_GenericCastToPtrExplicit_ToLocal{{.*}}(i8 addrspace(4)* + // CHECK: {{.*}}SubgroupLocalInvocationId + // CHECK: call spir_func i8 addrspace(1)* {{.*}}spirv_GenericCastToPtrExplicit_ToGlobal{{.*}}(i8 addrspace(4)* + // CHECK: call spir_func void {{.*}}spirv_SubgroupBlockWriteINTEL{{.*}}(i32 addrspace(1)* + // CHECK: call spir_func void {{.*}}assert + sg.store(&global[i], x + y + z); + }); + }); + } + + return 0; +} diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index e1d2982a2b13e..d8dd5611d9f8a 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -84,7 +84,7 @@ # Every SYCL implementation provides a host implementation. config.available_features.add('host') -triple=lit_config.params.get('SYCL_TRIPLE', 'spir64-unknown-linux-sycldevice') +triple=lit_config.params.get('SYCL_TRIPLE', 'spir64-unknown-unknown-sycldevice') lit_config.note("Triple: {}".format(triple)) config.substitutions.append( ('%sycl_triple', triple ) ) diff --git a/sycl/test/on-device/extensions/sub_group_as.cpp b/sycl/test/on-device/extensions/sub_group_as.cpp new file mode 100644 index 0000000000000..bea285f2a2e16 --- /dev/null +++ b/sycl/test/on-device/extensions/sub_group_as.cpp @@ -0,0 +1,70 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Sub-groups are not suported on Host +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// Execution on CPU and FPGA takes 100000 times longer +// RUNx: %CPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + cl::sycl::queue queue; + printf("Device Name = %s\n", + queue.get_device().get_info().c_str()); + + // Initialize some host memory + constexpr int N = 64; + int host_mem[N]; + for (int i = 0; i < N; ++i) { + host_mem[i] = i * 100; + } + + // Use the device to transform each value + { + cl::sycl::buffer buf(host_mem, N); + queue.submit([&](cl::sycl::handler &cgh) { + auto global = + buf.get_access(cgh); + sycl::accessor + local(N, cgh); + + cgh.parallel_for( + cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) { + cl::sycl::ONEAPI::sub_group sg = it.get_sub_group(); + if (!it.get_local_id(0)) { + int end = it.get_global_id(0) + it.get_local_range()[0]; + for (int i = it.get_global_id(0); i < end; i++) { + local[i] = i; + } + } + it.barrier(); + + int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) * + sg.get_max_local_range()[0]; + // Global address space + auto x = sg.load(&global[i]); + + // Local address space + auto y = sg.load(&local[i]); + + sg.store(&global[i], x + y); + }); + }); + } + + // Print results and tidy up + for (int i = 0; i < N; ++i) { + if (i * 101 != host_mem[i]) { + printf("Unexpected result %04d vs %04d\n", i * 101, host_mem[i]); + return 1; + } + } + printf("Success!\n"); + return 0; +} diff --git a/sycl/test/on-device/extensions/sub_group_as_vec.cpp b/sycl/test/on-device/extensions/sub_group_as_vec.cpp new file mode 100644 index 0000000000000..8f2e82e20e402 --- /dev/null +++ b/sycl/test/on-device/extensions/sub_group_as_vec.cpp @@ -0,0 +1,72 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Sub-groups are not suported on Host +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// Execution on CPU and FPGA takes 100000 times longer +// RUNx: %CPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + cl::sycl::queue queue; + printf("Device Name = %s\n", + queue.get_device().get_info().c_str()); + + // Initialize some host memory + constexpr int N = 64; + sycl::vec host_mem[N]; + for (int i = 0; i < N; ++i) { + host_mem[i].s0() = i; + host_mem[i].s1() = 0; + } + + // Use the device to transform each value + { + cl::sycl::buffer, 1> buf(host_mem, N); + queue.submit([&](cl::sycl::handler &cgh) { + auto global = + buf.get_access(cgh); + sycl::accessor, 1, sycl::access::mode::read_write, + sycl::access::target::local> + local(N, cgh); + cgh.parallel_for( + cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) { + cl::sycl::ONEAPI::sub_group sg = it.get_sub_group(); + if (!it.get_local_id(0)) { + int end = it.get_global_id(0) + it.get_local_range()[0]; + for (int i = it.get_global_id(0); i < end; i++) { + local[i].s0() = 0; + local[i].s1() = i; + } + } + it.barrier(); + + int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) * + sg.get_max_local_range()[0]; + // Global address space + auto x = sg.load(&global[i]); + + // Local address space + auto y = sg.load(&local[i]); + + sg.store(&global[i], x + y); + }); + }); + } + + // Print results and tidy up + for (int i = 0; i < N; ++i) { + if (i != host_mem[i].s0() || i != host_mem[i].s1()) { + printf("Unexpected result [%02d,%02d] vs [%02d,%02d]\n", i, i, + host_mem[i].s0(), host_mem[i].s1()); + return 1; + } + } + printf("Success!\n"); + return 0; +} diff --git a/sycl/test/on-device/lit.cfg.py b/sycl/test/on-device/lit.cfg.py index e96b00c53a50a..a1af1000bacac 100644 --- a/sycl/test/on-device/lit.cfg.py +++ b/sycl/test/on-device/lit.cfg.py @@ -211,7 +211,7 @@ def getDeviceCount(device_type): if cuda: config.substitutions.append( ('%sycl_triple', "nvptx64-nvidia-cuda-sycldevice" ) ) else: - config.substitutions.append( ('%sycl_triple', "spir64-unknown-linux-sycldevice" ) ) + config.substitutions.append( ('%sycl_triple', "spir64-unknown-unknown-sycldevice" ) ) if "opencl-aot" in config.llvm_enable_projects: lit_config.note("Using opencl-aot version which is built as part of the project")