Skip to content

Commit 8935acf

Browse files
antonwolfyvtavana
andauthored
Work around sub_group load/store issues (#1642)
Co-authored-by: vtavana <[email protected]>
1 parent 9b450f0 commit 8935acf

9 files changed

+50
-10
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"")
8080
find_package(Cython REQUIRED)
8181
find_package(Dpctl REQUIRED)
8282

83-
message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
83+
message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
8484
message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
8585

8686
if(WIN32)

dpnp/backend/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
107107
target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
108108

109109
target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR})
110+
target_include_directories(${_trgt} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
111+
110112
target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..")
111113
target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface)
112114

dpnp/backend/extensions/lapack/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
5252
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
5353
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
5454

55-
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
55+
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
5656
target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
5757

5858
if (WIN32)

dpnp/backend/extensions/sycl_ext/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
4545
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
4646
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
4747

48-
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
48+
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
4949
target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
5050

5151
if (WIN32)

dpnp/backend/extensions/vm/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
4545
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
4646
target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
4747

48-
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
48+
target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
4949
target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
5050

5151
if (WIN32)

dpnp/backend/kernels/dpnp_krnl_bitwise.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232
#include "dpnpc_memory_adapter.hpp"
3333
#include "queue_sycl.hpp"
3434

35+
// dpctl tensor headers
36+
#include "kernels/alignment.hpp"
37+
38+
using dpctl::tensor::kernels::alignment_utils::is_aligned;
39+
using dpctl::tensor::kernels::alignment_utils::required_alignment;
40+
3541
template <typename _KernelNameSpecialization>
3642
class dpnp_invert_c_kernel;
3743

@@ -67,7 +73,10 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
6773
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
6874
sg.get_group_id()[0] * max_sg_size);
6975

70-
if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
76+
if (is_aligned<required_alignment>(input_data) &&
77+
is_aligned<required_alignment>(result) &&
78+
(start + static_cast<size_t>(vec_sz) * max_sg_size < size))
79+
{
7180
auto input_multi_ptr = sycl::address_space_cast<
7281
sycl::access::address_space::global_space,
7382
sycl::access::decorated::yes>(&input_data[start]);

dpnp/backend/kernels/dpnp_krnl_elemwise.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@
3434
#include "dpnpc_memory_adapter.hpp"
3535
#include "queue_sycl.hpp"
3636

37+
// dpctl tensor headers
38+
#include "kernels/alignment.hpp"
39+
40+
using dpctl::tensor::kernels::alignment_utils::is_aligned;
41+
using dpctl::tensor::kernels::alignment_utils::required_alignment;
42+
3743
#define MACRO_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \
3844
template <typename _KernelNameSpecialization1, \
3945
typename _KernelNameSpecialization2> \
@@ -1198,8 +1204,12 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
11981204
(nd_it.get_group(0) * nd_it.get_local_range(0) + \
11991205
sg.get_group_id()[0] * max_sg_size); \
12001206
\
1201-
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
1202-
result_size) { \
1207+
if (is_aligned<required_alignment>(input1_data) && \
1208+
is_aligned<required_alignment>(input2_data) && \
1209+
is_aligned<required_alignment>(result) && \
1210+
(start + static_cast<size_t>(vec_sz) * max_sg_size < \
1211+
result_size)) \
1212+
{ \
12031213
auto input1_multi_ptr = sycl::address_space_cast< \
12041214
sycl::access::address_space::global_space, \
12051215
sycl::access::decorated::yes>( \

dpnp/backend/kernels/dpnp_krnl_logic.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@
3131
#include "dpnpc_memory_adapter.hpp"
3232
#include "queue_sycl.hpp"
3333

34+
// dpctl tensor headers
35+
#include "kernels/alignment.hpp"
36+
37+
using dpctl::tensor::kernels::alignment_utils::is_aligned;
38+
using dpctl::tensor::kernels::alignment_utils::required_alignment;
39+
3440
template <typename _DataType, typename _ResultType>
3541
class dpnp_all_c_kernel;
3642

@@ -610,8 +616,12 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
610616
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \
611617
sg.get_group_id()[0] * max_sg_size); \
612618
\
613-
if (start + static_cast<size_t>(vec_sz) * max_sg_size < \
614-
result_size) { \
619+
if (is_aligned<required_alignment>(input1_data) && \
620+
is_aligned<required_alignment>(input2_data) && \
621+
is_aligned<required_alignment>(result) && \
622+
(start + static_cast<size_t>(vec_sz) * max_sg_size < \
623+
result_size)) \
624+
{ \
615625
auto input1_multi_ptr = sycl::address_space_cast< \
616626
sycl::access::address_space::global_space, \
617627
sycl::access::decorated::yes>(&input1_data[start]); \

dpnp/backend/kernels/dpnp_krnl_mathematical.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@
3535
#include "dpnpc_memory_adapter.hpp"
3636
#include "queue_sycl.hpp"
3737

38+
// dpctl tensor headers
39+
#include "kernels/alignment.hpp"
40+
41+
using dpctl::tensor::kernels::alignment_utils::is_aligned;
42+
using dpctl::tensor::kernels::alignment_utils::required_alignment;
43+
3844
static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VECTOR_ABS_CHANGED,
3945
"SYCL DPC++ compiler does not meet minimum version requirement");
4046

@@ -163,7 +169,10 @@ DPCTLSyclEventRef
163169
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
164170
sg.get_group_id()[0] * max_sg_size);
165171

166-
if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
172+
if (is_aligned<required_alignment>(array1) &&
173+
is_aligned<required_alignment>(result) &&
174+
(start + static_cast<size_t>(vec_sz) * max_sg_size < size))
175+
{
167176
auto array_multi_ptr = sycl::address_space_cast<
168177
sycl::access::address_space::global_space,
169178
sycl::access::decorated::yes>(&array1[start]);

0 commit comments

Comments
 (0)