From 9960b60bf8024a7dc3e2560fa215da35f76b291c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 1 Jun 2023 08:06:03 -0500 Subject: [PATCH 1/7] Improve dpnp.div() implementation: - reuse dpctl.tensor.div() - write pybind11 extension for div() from OneMKL VM --- dpnp/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/CMakeLists.txt | 75 +++++ dpnp/backend/extensions/vm/div.cpp | 307 ++++++++++++++++++++ dpnp/backend/extensions/vm/div.hpp | 57 ++++ dpnp/backend/extensions/vm/types_matrix.hpp | 67 +++++ dpnp/backend/extensions/vm/vm_py.cpp | 71 +++++ dpnp/dpnp_algo/dpnp_algo.pxd | 3 - dpnp/dpnp_algo/dpnp_algo.pyx | 2 +- dpnp/dpnp_algo/dpnp_algo_mathematical.pxi | 9 - dpnp/dpnp_algo/dpnp_elementwise_common.py | 84 ++++++ dpnp/dpnp_iface.py | 27 ++ dpnp/dpnp_iface_mathematical.py | 28 +- tests/test_sycl_queue.py | 5 +- 13 files changed, 708 insertions(+), 28 deletions(-) create mode 100644 dpnp/backend/extensions/vm/CMakeLists.txt create mode 100644 dpnp/backend/extensions/vm/div.cpp create mode 100644 dpnp/backend/extensions/vm/div.hpp create mode 100644 dpnp/backend/extensions/vm/types_matrix.hpp create mode 100644 dpnp/backend/extensions/vm/vm_py.cpp create mode 100644 dpnp/dpnp_algo/dpnp_elementwise_common.py diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 54be4eb23b9a..89524ab1c58d 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -48,6 +48,7 @@ endfunction() build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp) add_subdirectory(backend) add_subdirectory(backend/extensions/lapack) +add_subdirectory(backend/extensions/vm) add_subdirectory(dpnp_algo) add_subdirectory(dpnp_utils) diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt new file mode 100644 index 000000000000..8f3086ec3a9e --- /dev/null +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -0,0 +1,75 @@ +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _vm_impl) +pybind11_add_module(${python_module_name} MODULE + vm_py.cpp + div.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/vm" +) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp new file mode 100644 index 000000000000..48b64959b88a --- /dev/null +++ b/dpnp/backend/extensions/vm/div.cpp @@ -0,0 +1,307 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "div.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ + +namespace mkl_vm = oneapi::mkl::vm; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*div_impl_fn_ptr_t)( + sycl::queue, const std::int64_t, const char*, const char*, char*, const std::vector&); + +static div_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event div_impl(sycl::queue exec_q, + const std::int64_t n, + const char* in_a, + const char* in_b, + char* out_y, + const std::vector& depends) +{ + type_utils::validate_type_for_device(exec_q); + + const T* a = reinterpret_cast(in_a); + const T* b = reinterpret_cast(in_b); + T* y = reinterpret_cast(out_y); + + return mkl_vm::div(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +std::pair div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise + const std::vector& depends) +{ + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + if (src1_typeid != src2_typeid || src2_typeid != dst_typeid) + { + throw py::value_error("Either any of input arrays or output array have different types"); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) + { + throw py::value_error("Execution queue is not compatible with allocation queues"); + } + + // check shapes, broadcasting is assumed done by caller + // check that dimensions are the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) + { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t* src1_shape = src1.get_shape_raw(); + const py::ssize_t* src2_shape = src2.get_shape_raw(); + const py::ssize_t* dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) + { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) + { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) + { + return std::make_pair(sycl::event(), sycl::event()); + } + + // ensure that output is ample enough to accomodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accomodate all elements + { + size_t range = static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) + { + throw py::value_error( + "Destination array can not accomodate all the " + "elements of source array."); + } + } + + // check memory overlap + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src1, dst) || overlap(src2, dst)) + { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char* src1_data = src1.get_data(); + const char* src2_data = src2.get_data(); + char* dst_data = dst.get_data(); + + // handle contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + if (!all_c_contig) + { + throw py::value_error("Input and outpur arrays must be C-contiguous"); + } + + auto div_fn = div_dispatch_vector[dst_typeid]; + if (div_fn == nullptr) + { + throw py::value_error("No div implementation defined"); + } + sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends); + + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); + return std::make_pair(ht_ev, sum_ev); +} + +bool can_call_div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst) +{ + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + // types must be the same + if (src1_typeid != src2_typeid || src2_typeid != dst_typeid) + { + return false; + } + + // OneMKL VM functions perform a copy on host if no double type support + if (!exec_q.get_device().has(sycl::aspect::fp64)) + { + return false; + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) + { + return false; + } + + // dimensions must be the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) + { + return false; + } + + // shapes must be the same + const py::ssize_t* src1_shape = src1.get_shape_raw(); + const py::ssize_t* src2_shape = src2.get_shape_raw(); + const py::ssize_t* dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) + { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) + { + return false; + } + + // if nelems is zero, return false + if (src_nelems == 0) + { + return false; + } + + // ensure that output is ample enough to accomodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accomodate all elements + { + size_t range = static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) + { + return false; + } + } + + // check memory overlap + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src1, dst) || overlap(src2, dst)) + { + return false; + } + + // suppport only contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + if (!all_c_contig) + { + return false; + } + + // MKL function is not defined for the type + if (div_dispatch_vector[src1_typeid] == nullptr) + { + return false; + } + return true; +} + +template +struct DivContigFactory +{ + fnT get() + { + if constexpr (types::DivTypePairSupportFactory::is_defined) + { + return div_impl; + } + else + { + return nullptr; + } + } +}; + +void init_div_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder contig; + contig.populate_dispatch_vector(div_dispatch_vector); +} +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/div.hpp b/dpnp/backend/extensions/vm/div.hpp new file mode 100644 index 000000000000..d530a156aa4a --- /dev/null +++ b/dpnp/backend/extensions/vm/div.hpp @@ -0,0 +1,57 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ + extern std::pair div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + const std::vector& depends); + + extern bool can_call_div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst); + + extern void init_div_dispatch_vector(void); +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp new file mode 100644 index 000000000000..af7c9c47e731 --- /dev/null +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -0,0 +1,67 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +// dpctl namespace for operations with types +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ +namespace types +{ + /** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::div function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ + template + struct DivTypePairSupportFactory + { + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, T, std::complex>, + dpctl_td_ns::TypePairDefinedEntry, T, std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; + }; +} // namespace types +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp new file mode 100644 index 000000000000..9a040b1f59b0 --- /dev/null +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -0,0 +1,71 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._lapack_impl extensions +// +//***************************************************************************** + +#include +#include + +#include "div.hpp" + +namespace vm_ext = dpnp::backend::ext::vm; +namespace py = pybind11; + +// populate dispatch vectors +void init_dispatch_vectors(void) +{ + vm_ext::init_div_dispatch_vector(); +} + +// populate dispatch tables +void init_dispatch_tables(void) +{ +} + +PYBIND11_MODULE(_vm_impl, m) +{ + init_dispatch_vectors(); + init_dispatch_tables(); + + m.def("_div", + &vm_ext::div, + "Call `div` from OneMKL VM library to performs element by element division " + "of vector `src1` by vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), + py::arg("src1"), + py::arg("src2"), + py::arg("dst"), + py::arg("depends") = py::list()); + + m.def("_can_call_div", + &vm_ext::can_call_div, + "Check input arrays to answer if `div` function from OneMKL VM library can be used", + py::arg("sycl_queue"), + py::arg("src1"), + py::arg("src2"), + py::arg("dst")); +} diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 56195613a338..0120aa6b453a 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -118,7 +118,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_DIAGONAL DPNP_FN_DIAGONAL_EXT DPNP_FN_DIVIDE - DPNP_FN_DIVIDE_EXT DPNP_FN_DOT DPNP_FN_DOT_EXT DPNP_FN_EDIFF1D @@ -531,8 +530,6 @@ cpdef dpnp_descriptor dpnp_add(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, o dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_arctan2(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, dpnp_descriptor out=*, object where=*) -cpdef dpnp_descriptor dpnp_divide(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, - dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_hypot(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_maximum(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 24abd1b4b9e4..8b6d4be73e94 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -499,7 +499,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, # get FPTR function and return type cdef fptr_2in_1out_strides_t func = NULL cdef DPNPFuncType return_type = DPNP_FT_NONE - if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64: + if result_sycl_device.has_aspect_fp64: return_type = kernel_data.return_type func = < fptr_2in_1out_strides_t > kernel_data.ptr else: diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index b5534c7c1a8e..3a002fdd4ba7 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -47,7 +47,6 @@ __all__ += [ "dpnp_cumprod", "dpnp_cumsum", "dpnp_diff", - "dpnp_divide", "dpnp_ediff1d", "dpnp_fabs", "dpnp_floor", @@ -249,14 +248,6 @@ cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n): return dpnp_diff(res, n - 1) -cpdef utils.dpnp_descriptor dpnp_divide(utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where) - - cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): if x1.size <= 1: diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py new file mode 100644 index 000000000000..15481941f85f --- /dev/null +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -0,0 +1,84 @@ +# cython: language_level=3 +# distutils: language = c++ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +import dpnp +from dpnp.dpnp_array import dpnp_array +import dpnp.backend.extensions.vm._vm_impl as vmi + +from dpctl.tensor._elementwise_common import ( + BinaryElementwiseFunc +) +import dpctl.tensor._tensor_impl as ti + + +__all__ = [ + "dpnp_divide" +] + + +_divide_docstring_ = """ +divide(x1, x2, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (dpnp.ndarray): + First input array, expected to have numeric data type. + x2 (dpnp.ndarray): + Second input array, also expected to have numeric data type. +Returns: + dpnp.ndarray: + an array containing the result of element-wise division. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +def dpnp_divide(x1, x2, out=None, order='K'): + """ + Invokes div() function from pybind11 extension of OneMKL VM if possible. + Otherwise fully relies on dpctl.tensor implementation for divide() function. + + """ + + def _call_divide(src1, src2, dst, sycl_queue, depends=[]): + """A callback to register in BinaryElementwiseFunc class of dpctl.tensor""" + + if vmi._can_call_div(sycl_queue, src1, src2, dst): + # call pybind11 extension for div() function from OneMKL VM + return vmi._div(sycl_queue, src1, src2, dst, depends) + return ti._divide(src1, src2, dst, sycl_queue, depends) + + # dpctl.tensor only works with usm_ndarray or scalar + x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1) + x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2) + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + + func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, _divide_docstring_) + res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order) + return dpnp_array._create_from_usm_ndarray(res_usm) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index ce3c540539d6..93629440e7d2 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -69,6 +69,7 @@ "get_include", "get_normalized_queue_device", "get_usm_ndarray", + "get_usm_ndarray_or_scalar", "is_supported_array_type" ] @@ -404,6 +405,32 @@ def get_usm_ndarray(a): raise TypeError("An array must be any of supported type, but got {}".format(type(a))) +def get_usm_ndarray_or_scalar(a): + """ + Return scalar or :class:`dpctl.tensor.usm_ndarray` from input object `a`. + + Parameters + ---------- + a : {scalar, dpnp_array, usm_ndarray} + Input of any supported type: scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. + + Returns + ------- + out : scalar, usm_ndarray + A scalar if the input `a` is scalar. + A dpctl USM ndarray if the input `a` is array. + + Raises + ------ + TypeError + If input parameter `a` is of unsupported object type. + + """ + + return a if isscalar(a) else get_usm_ndarray(a) + + def is_supported_array_type(a): """ Return ``True`` if an array of either type :class:`dpnp.ndarray` diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 98dcc71d31af..a7f5a7e8a0cb 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -40,8 +40,11 @@ """ -from dpnp.dpnp_algo import * -from dpnp.dpnp_utils import * +from .dpnp_algo import * +from .dpnp_algo.dpnp_elementwise_common import ( + dpnp_divide +) +from .dpnp_utils import * import dpnp @@ -586,6 +589,7 @@ def divide(x1, out=None, *, where=True, + order='K', dtype=None, subok=True, **kwargs): @@ -617,28 +621,24 @@ def divide(x1, """ - if out is not None: - pass - elif where is not True: + if where is not True: pass elif dtype is not None: pass elif subok is not True: pass + elif kwargs: + pass elif dpnp.isscalar(x1) and dpnp.isscalar(x2): # at least either x1 or x2 has to be an array pass else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() + if order in "afkcAFKC": + order = order.upper() + else: + raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order)) + return dpnp_divide(x1, x2, out=out, order=order) return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index e5e53646e1a1..3182003a90be 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -6,6 +6,9 @@ import dpnp import dpctl +from dpctl.utils import ( + ExecutionPlacementError +) import numpy from numpy.testing import ( @@ -431,7 +434,7 @@ def test_broadcasting(func, data1, data2, device): def test_2in_1out_diff_queue_but_equal_context(func, device): x1 = dpnp.arange(10) x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1] - with assert_raises(ValueError): + with assert_raises((ValueError, ExecutionPlacementError)): getattr(dpnp, func)(x1, x2) From 9cd4eac735b0bc921317811a25a392fda82326e5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 6 Jun 2023 09:51:53 -0500 Subject: [PATCH 2/7] Add support of order=None --- dpnp/dpnp_iface_mathematical.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index a7f5a7e8a0cb..9e877703b409 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -635,6 +635,8 @@ def divide(x1, else: if order in "afkcAFKC": order = order.upper() + elif order is None: + order = 'K' else: raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order)) From 08b10cf1203b5cefb0dfe2ef13bac008468e9648 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 7 Jun 2023 11:58:52 -0500 Subject: [PATCH 3/7] add docstrings for out and order --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 15481941f85f..9c2383e57a0b 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -53,6 +53,12 @@ First input array, expected to have numeric data type. x2 (dpnp.ndarray): Second input array, also expected to have numeric data type. + out ({None, dpnp.ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", None, optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: dpnp.ndarray: an array containing the result of element-wise division. The data type From ee09d7b8740c3052c10d8672fc0c029229a13b0d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 9 Jun 2023 09:15:40 -0500 Subject: [PATCH 4/7] Fix hanging in tril --- dpnp/backend/extensions/vm/div.cpp | 5 +++++ tests/test_mathematical.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 48b64959b88a..6e98e09f3475 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -219,6 +219,11 @@ bool can_call_div(sycl::queue exec_q, { return false; } + else if (dst_nd == 0) + { + // don't call OneMKL for 0d arrays + return false; + } // shapes must be the same const py::ssize_t* src1_shape = src1.get_shape_raw(); diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 5f0d73b23b7b..6224eb57ab81 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -284,7 +284,7 @@ def test_divide_scalar(shape, dtype): result = 0.5 / dpnp_a / 1.7 expected = 0.5 / np_a / 1.7 - assert_allclose(result, expected) + assert_allclose(result, expected, rtol=1e-6) @pytest.mark.parametrize("shape", From 05c7d776011d2a1e1f4ae53a314f7a6635450eba Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 9 Jun 2023 12:10:24 -0500 Subject: [PATCH 5/7] Remove w/a in divide tests for type aligning with numpy --- tests/third_party/cupy/math_tests/test_arithmetic.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 39dc3e10f721..71f33429c704 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -168,13 +168,6 @@ def check_binary(self, xp): y = y.astype(dtype1) elif is_array_arg2 and not is_array_arg1: y = y.astype(dtype2) - elif self.name in ('divide', 'true_divide'): - # If one input is an array of float32 and another - an integer or floating scalar, - # NumPy will return an output array of float32, while DPNP will return the array of float64, - # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type. - if not (is_array_arg1 and is_array_arg2): - if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32): - y = y.astype(numpy.float32) # NumPy returns different values (nan/inf) on division by zero # depending on the architecture. From 864a620536989d1620a9ca9bc78b84752298a7a5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 9 Jun 2023 14:46:37 -0500 Subject: [PATCH 6/7] only events from host task has to be passed in keep_args_alive() --- .github/workflows/conda-package.yml | 8 +++- .github/workflows/generate_coverage.yaml | 1 + dpnp/backend/extensions/vm/CMakeLists.txt | 13 ++++++ dpnp/backend/extensions/vm/div.cpp | 56 ++++++++++++++++++++--- 4 files changed, 69 insertions(+), 9 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index e42adbdc913d..5645191b7c6e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -194,8 +194,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} + env: + SYCL_QUEUE_THREAD_POOL_SIZE: 16 test_windows: name: Test ['windows-latest', python='${{ matrix.python }}'] @@ -331,8 +333,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} + env: + SYCL_QUEUE_THREAD_POOL_SIZE: 16 upload: name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}'] diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index bd3c8c366f0e..75feca67e560 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -66,6 +66,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_PARALLEL: true + SYCL_QUEUE_THREAD_POOL_SIZE: 6 coveralls: name: Indicate completion to coveralls.io diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 8f3086ec3a9e..07a4ffae8aba 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -55,7 +55,9 @@ else() target_compile_options(${python_module_name} PRIVATE -fno-approx-func -fno-finite-math-only + -no-ipo ) + target_link_options(${python_module_name} PRIVATE -no-ipo) endif() target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) @@ -70,6 +72,17 @@ endif() target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) +target_link_libraries(${python_module_name} PUBLIC oneDPL) + +if (UNIX) + # needed for STL headers with GCC < 11 + target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0) +endif() + +target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) +# work-around for Windows at exit crash with predefined policies +target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) + install(TARGETS ${python_module_name} DESTINATION "dpnp/backend/extensions/vm" ) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 6e98e09f3475..8a6751a45a44 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -64,16 +64,46 @@ static sycl::event div_impl(sycl::queue exec_q, { type_utils::validate_type_for_device(exec_q); - const T* a = reinterpret_cast(in_a); - const T* b = reinterpret_cast(in_b); - T* y = reinterpret_cast(out_y); + std::cerr << "enter div_impl" << std::endl; - return mkl_vm::div(exec_q, + const T* _a = reinterpret_cast(in_a); + const T* _b = reinterpret_cast(in_b); + T* _y = reinterpret_cast(out_y); + + std::cerr << "casting is done" << std::endl; + + T* a = sycl::malloc_device(n, exec_q); + T* b = sycl::malloc_device(n, exec_q); + T* y = sycl::malloc_device(n, exec_q); + + std::cerr << "malloc is done" << std::endl; + + exec_q.copy(_a, a, n).wait(); + exec_q.copy(_b, b, n).wait(); + exec_q.copy(_y, y, n).wait(); + + std::cerr << "copy is done" << std::endl; + + sycl::event ev = mkl_vm::div(exec_q, n, // number of elements to be calculated a, // pointer `a` containing 1st input vector of size n b, // pointer `b` containing 2nd input vector of size n y, // pointer `y` to the output vector of size n depends); + ev.wait(); + + std::cerr << "div is done" << std::endl; + + exec_q.copy(y, _y, n).wait(); + + std::cerr << "copy is done" << std::endl; + + sycl::free(a, exec_q); + sycl::free(b, exec_q); + sycl::free(y, exec_q); + + std::cerr << "leaving div_impl" << std::endl; + return sycl::event(); } std::pair div(sycl::queue exec_q, @@ -175,9 +205,21 @@ std::pair div(sycl::queue exec_q, throw py::value_error("No div implementation defined"); } sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends); - - sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); - return std::make_pair(ht_ev, sum_ev); + // sum_ev.wait(); + + // int* dummy = sycl::malloc_device(1, exec_q); + // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) { + // // cgh.depends_on(sum_ev); + // auto ctx = exec_q.get_context(); + // cgh.host_task([dummy, ctx]() { + // // dummy host task to pass into keep_args_alive + // sycl::free(dummy, ctx); + // }); + // }); + + // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); + // return std::make_pair(ht_ev, sum_ev); + return std::make_pair(sycl::event(), sycl::event()); } bool can_call_div(sycl::queue exec_q, From 1a07356e8182b646128fb8b0bd6004870af19956 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 12 Jun 2023 08:24:36 -0500 Subject: [PATCH 7/7] exclude call of div() with MKL 2023.1.0 --- .github/workflows/conda-package.yml | 8 +-- dpnp/backend/extensions/vm/CMakeLists.txt | 13 ----- dpnp/backend/extensions/vm/div.cpp | 66 +++++++---------------- 3 files changed, 22 insertions(+), 65 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 5645191b7c6e..fbfe66ff17b9 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -194,10 +194,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} env: - SYCL_QUEUE_THREAD_POOL_SIZE: 16 + SYCL_QUEUE_THREAD_POOL_SIZE: 6 test_windows: name: Test ['windows-latest', python='${{ matrix.python }}'] @@ -333,10 +333,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} env: - SYCL_QUEUE_THREAD_POOL_SIZE: 16 + SYCL_QUEUE_THREAD_POOL_SIZE: 6 upload: name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}'] diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 07a4ffae8aba..8f3086ec3a9e 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -55,9 +55,7 @@ else() target_compile_options(${python_module_name} PRIVATE -fno-approx-func -fno-finite-math-only - -no-ipo ) - target_link_options(${python_module_name} PRIVATE -no-ipo) endif() target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) @@ -72,17 +70,6 @@ endif() target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) -target_link_libraries(${python_module_name} PUBLIC oneDPL) - -if (UNIX) - # needed for STL headers with GCC < 11 - target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0) -endif() - -target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) -# work-around for Windows at exit crash with predefined policies -target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) - install(TARGETS ${python_module_name} DESTINATION "dpnp/backend/extensions/vm" ) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 8a6751a45a44..28fbe1cdf3cc 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -64,46 +64,16 @@ static sycl::event div_impl(sycl::queue exec_q, { type_utils::validate_type_for_device(exec_q); - std::cerr << "enter div_impl" << std::endl; + const T* a = reinterpret_cast(in_a); + const T* b = reinterpret_cast(in_b); + T* y = reinterpret_cast(out_y); - const T* _a = reinterpret_cast(in_a); - const T* _b = reinterpret_cast(in_b); - T* _y = reinterpret_cast(out_y); - - std::cerr << "casting is done" << std::endl; - - T* a = sycl::malloc_device(n, exec_q); - T* b = sycl::malloc_device(n, exec_q); - T* y = sycl::malloc_device(n, exec_q); - - std::cerr << "malloc is done" << std::endl; - - exec_q.copy(_a, a, n).wait(); - exec_q.copy(_b, b, n).wait(); - exec_q.copy(_y, y, n).wait(); - - std::cerr << "copy is done" << std::endl; - - sycl::event ev = mkl_vm::div(exec_q, + return mkl_vm::div(exec_q, n, // number of elements to be calculated a, // pointer `a` containing 1st input vector of size n b, // pointer `b` containing 2nd input vector of size n y, // pointer `y` to the output vector of size n depends); - ev.wait(); - - std::cerr << "div is done" << std::endl; - - exec_q.copy(y, _y, n).wait(); - - std::cerr << "copy is done" << std::endl; - - sycl::free(a, exec_q); - sycl::free(b, exec_q); - sycl::free(y, exec_q); - - std::cerr << "leaving div_impl" << std::endl; - return sycl::event(); } std::pair div(sycl::queue exec_q, @@ -205,20 +175,9 @@ std::pair div(sycl::queue exec_q, throw py::value_error("No div implementation defined"); } sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends); - // sum_ev.wait(); - - // int* dummy = sycl::malloc_device(1, exec_q); - // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) { - // // cgh.depends_on(sum_ev); - // auto ctx = exec_q.get_context(); - // cgh.host_task([dummy, ctx]() { - // // dummy host task to pass into keep_args_alive - // sycl::free(dummy, ctx); - // }); - // }); - - // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); - // return std::make_pair(ht_ev, sum_ev); + + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); + return std::make_pair(ht_ev, sum_ev); return std::make_pair(sycl::event(), sycl::event()); } @@ -227,6 +186,7 @@ bool can_call_div(sycl::queue exec_q, dpctl::tensor::usm_ndarray src2, dpctl::tensor::usm_ndarray dst) { +#if INTEL_MKL_VERSION >= 20230002 // check type_nums int src1_typenum = src1.get_typenum(); int src2_typenum = src2.get_typenum(); @@ -325,6 +285,16 @@ bool can_call_div(sycl::queue exec_q, return false; } return true; +#else + // In OneMKL 2023.1.0 the call of oneapi::mkl::vm::div() is going to dead lock + // inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...} + + (void)exec_q; + (void)src1; + (void)src2; + (void)dst; + return false; +#endif // INTEL_MKL_VERSION >= 20230002 } template