diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index e42adbdc913d..fbfe66ff17b9 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -196,6 +196,8 @@ jobs: run: | python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} + env: + SYCL_QUEUE_THREAD_POOL_SIZE: 6 test_windows: name: Test ['windows-latest', python='${{ matrix.python }}'] @@ -333,6 +335,8 @@ jobs: run: | python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} + env: + SYCL_QUEUE_THREAD_POOL_SIZE: 6 upload: name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}'] diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index bd3c8c366f0e..75feca67e560 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -66,6 +66,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_PARALLEL: true + SYCL_QUEUE_THREAD_POOL_SIZE: 6 coveralls: name: Indicate completion to coveralls.io diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 54be4eb23b9a..89524ab1c58d 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -48,6 +48,7 @@ endfunction() build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp) add_subdirectory(backend) add_subdirectory(backend/extensions/lapack) +add_subdirectory(backend/extensions/vm) add_subdirectory(dpnp_algo) add_subdirectory(dpnp_utils) diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt new file mode 100644 index 000000000000..8f3086ec3a9e --- /dev/null +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -0,0 +1,75 @@ +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _vm_impl) +pybind11_add_module(${python_module_name} MODULE + vm_py.cpp + div.cpp +) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) +if (UNIX) + # this option is support on Linux only + target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code) +endif() + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/vm" +) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp new file mode 100644 index 000000000000..28fbe1cdf3cc --- /dev/null +++ b/dpnp/backend/extensions/vm/div.cpp @@ -0,0 +1,324 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "div.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ + +namespace mkl_vm = oneapi::mkl::vm; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*div_impl_fn_ptr_t)( + sycl::queue, const std::int64_t, const char*, const char*, char*, const std::vector&); + +static div_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event div_impl(sycl::queue exec_q, + const std::int64_t n, + const char* in_a, + const char* in_b, + char* out_y, + const std::vector& depends) +{ + type_utils::validate_type_for_device(exec_q); + + const T* a = reinterpret_cast(in_a); + const T* b = reinterpret_cast(in_b); + T* y = reinterpret_cast(out_y); + + return mkl_vm::div(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +std::pair div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise + const std::vector& depends) +{ + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + if (src1_typeid != src2_typeid || src2_typeid != dst_typeid) + { + throw py::value_error("Either any of input arrays or output array have different types"); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) + { + throw py::value_error("Execution queue is not compatible with allocation queues"); + } + + // check shapes, broadcasting is assumed done by caller + // check that dimensions are the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) + { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t* src1_shape = src1.get_shape_raw(); + const py::ssize_t* src2_shape = src2.get_shape_raw(); + const py::ssize_t* dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) + { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) + { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) + { + return std::make_pair(sycl::event(), sycl::event()); + } + + // ensure that output is ample enough to accomodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accomodate all elements + { + size_t range = static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) + { + throw py::value_error( + "Destination array can not accomodate all the " + "elements of source array."); + } + } + + // check memory overlap + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src1, dst) || overlap(src2, dst)) + { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char* src1_data = src1.get_data(); + const char* src2_data = src2.get_data(); + char* dst_data = dst.get_data(); + + // handle contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + if (!all_c_contig) + { + throw py::value_error("Input and outpur arrays must be C-contiguous"); + } + + auto div_fn = div_dispatch_vector[dst_typeid]; + if (div_fn == nullptr) + { + throw py::value_error("No div implementation defined"); + } + sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends); + + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); + return std::make_pair(ht_ev, sum_ev); + return std::make_pair(sycl::event(), sycl::event()); +} + +bool can_call_div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst) +{ +#if INTEL_MKL_VERSION >= 20230002 + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + // types must be the same + if (src1_typeid != src2_typeid || src2_typeid != dst_typeid) + { + return false; + } + + // OneMKL VM functions perform a copy on host if no double type support + if (!exec_q.get_device().has(sycl::aspect::fp64)) + { + return false; + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) + { + return false; + } + + // dimensions must be the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) + { + return false; + } + else if (dst_nd == 0) + { + // don't call OneMKL for 0d arrays + return false; + } + + // shapes must be the same + const py::ssize_t* src1_shape = src1.get_shape_raw(); + const py::ssize_t* src2_shape = src2.get_shape_raw(); + const py::ssize_t* dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) + { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) + { + return false; + } + + // if nelems is zero, return false + if (src_nelems == 0) + { + return false; + } + + // ensure that output is ample enough to accomodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accomodate all elements + { + size_t range = static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) + { + return false; + } + } + + // check memory overlap + auto const& overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src1, dst) || overlap(src2, dst)) + { + return false; + } + + // suppport only contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + if (!all_c_contig) + { + return false; + } + + // MKL function is not defined for the type + if (div_dispatch_vector[src1_typeid] == nullptr) + { + return false; + } + return true; +#else + // In OneMKL 2023.1.0 the call of oneapi::mkl::vm::div() is going to dead lock + // inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...} + + (void)exec_q; + (void)src1; + (void)src2; + (void)dst; + return false; +#endif // INTEL_MKL_VERSION >= 20230002 +} + +template +struct DivContigFactory +{ + fnT get() + { + if constexpr (types::DivTypePairSupportFactory::is_defined) + { + return div_impl; + } + else + { + return nullptr; + } + } +}; + +void init_div_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder contig; + contig.populate_dispatch_vector(div_dispatch_vector); +} +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/div.hpp b/dpnp/backend/extensions/vm/div.hpp new file mode 100644 index 000000000000..d530a156aa4a --- /dev/null +++ b/dpnp/backend/extensions/vm/div.hpp @@ -0,0 +1,57 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ + extern std::pair div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, + const std::vector& depends); + + extern bool can_call_div(sycl::queue exec_q, + dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst); + + extern void init_div_dispatch_vector(void); +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp new file mode 100644 index 000000000000..af7c9c47e731 --- /dev/null +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -0,0 +1,67 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +// dpctl namespace for operations with types +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace vm +{ +namespace types +{ + /** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::div function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ + template + struct DivTypePairSupportFactory + { + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, T, std::complex>, + dpctl_td_ns::TypePairDefinedEntry, T, std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; + }; +} // namespace types +} // namespace vm +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp new file mode 100644 index 000000000000..9a040b1f59b0 --- /dev/null +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -0,0 +1,71 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._lapack_impl extensions +// +//***************************************************************************** + +#include +#include + +#include "div.hpp" + +namespace vm_ext = dpnp::backend::ext::vm; +namespace py = pybind11; + +// populate dispatch vectors +void init_dispatch_vectors(void) +{ + vm_ext::init_div_dispatch_vector(); +} + +// populate dispatch tables +void init_dispatch_tables(void) +{ +} + +PYBIND11_MODULE(_vm_impl, m) +{ + init_dispatch_vectors(); + init_dispatch_tables(); + + m.def("_div", + &vm_ext::div, + "Call `div` from OneMKL VM library to performs element by element division " + "of vector `src1` by vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), + py::arg("src1"), + py::arg("src2"), + py::arg("dst"), + py::arg("depends") = py::list()); + + m.def("_can_call_div", + &vm_ext::can_call_div, + "Check input arrays to answer if `div` function from OneMKL VM library can be used", + py::arg("sycl_queue"), + py::arg("src1"), + py::arg("src2"), + py::arg("dst")); +} diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 56195613a338..0120aa6b453a 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -118,7 +118,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_DIAGONAL DPNP_FN_DIAGONAL_EXT DPNP_FN_DIVIDE - DPNP_FN_DIVIDE_EXT DPNP_FN_DOT DPNP_FN_DOT_EXT DPNP_FN_EDIFF1D @@ -531,8 +530,6 @@ cpdef dpnp_descriptor dpnp_add(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, o dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_arctan2(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, dpnp_descriptor out=*, object where=*) -cpdef dpnp_descriptor dpnp_divide(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, - dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_hypot(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, dpnp_descriptor out=*, object where=*) cpdef dpnp_descriptor dpnp_maximum(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj, object dtype=*, diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 24abd1b4b9e4..8b6d4be73e94 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -499,7 +499,7 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, # get FPTR function and return type cdef fptr_2in_1out_strides_t func = NULL cdef DPNPFuncType return_type = DPNP_FT_NONE - if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64: + if result_sycl_device.has_aspect_fp64: return_type = kernel_data.return_type func = < fptr_2in_1out_strides_t > kernel_data.ptr else: diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index b5534c7c1a8e..3a002fdd4ba7 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -47,7 +47,6 @@ __all__ += [ "dpnp_cumprod", "dpnp_cumsum", "dpnp_diff", - "dpnp_divide", "dpnp_ediff1d", "dpnp_fabs", "dpnp_floor", @@ -249,14 +248,6 @@ cpdef utils.dpnp_descriptor dpnp_diff(utils.dpnp_descriptor x1, int n): return dpnp_diff(res, n - 1) -cpdef utils.dpnp_descriptor dpnp_divide(utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_DIVIDE_EXT, x1_obj, x2_obj, dtype, out, where) - - cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): if x1.size <= 1: diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py new file mode 100644 index 000000000000..9c2383e57a0b --- /dev/null +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -0,0 +1,90 @@ +# cython: language_level=3 +# distutils: language = c++ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2023, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +import dpnp +from dpnp.dpnp_array import dpnp_array +import dpnp.backend.extensions.vm._vm_impl as vmi + +from dpctl.tensor._elementwise_common import ( + BinaryElementwiseFunc +) +import dpctl.tensor._tensor_impl as ti + + +__all__ = [ + "dpnp_divide" +] + + +_divide_docstring_ = """ +divide(x1, x2, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (dpnp.ndarray): + First input array, expected to have numeric data type. + x2 (dpnp.ndarray): + Second input array, also expected to have numeric data type. + out ({None, dpnp.ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", None, optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + dpnp.ndarray: + an array containing the result of element-wise division. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +def dpnp_divide(x1, x2, out=None, order='K'): + """ + Invokes div() function from pybind11 extension of OneMKL VM if possible. + Otherwise fully relies on dpctl.tensor implementation for divide() function. + + """ + + def _call_divide(src1, src2, dst, sycl_queue, depends=[]): + """A callback to register in BinaryElementwiseFunc class of dpctl.tensor""" + + if vmi._can_call_div(sycl_queue, src1, src2, dst): + # call pybind11 extension for div() function from OneMKL VM + return vmi._div(sycl_queue, src1, src2, dst, depends) + return ti._divide(src1, src2, dst, sycl_queue, depends) + + # dpctl.tensor only works with usm_ndarray or scalar + x1_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x1) + x2_usm_or_scalar = dpnp.get_usm_ndarray_or_scalar(x2) + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + + func = BinaryElementwiseFunc("divide", ti._divide_result_type, _call_divide, _divide_docstring_) + res_usm = func(x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order) + return dpnp_array._create_from_usm_ndarray(res_usm) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index ce3c540539d6..93629440e7d2 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -69,6 +69,7 @@ "get_include", "get_normalized_queue_device", "get_usm_ndarray", + "get_usm_ndarray_or_scalar", "is_supported_array_type" ] @@ -404,6 +405,32 @@ def get_usm_ndarray(a): raise TypeError("An array must be any of supported type, but got {}".format(type(a))) +def get_usm_ndarray_or_scalar(a): + """ + Return scalar or :class:`dpctl.tensor.usm_ndarray` from input object `a`. + + Parameters + ---------- + a : {scalar, dpnp_array, usm_ndarray} + Input of any supported type: scalar, :class:`dpnp.ndarray` + or :class:`dpctl.tensor.usm_ndarray`. + + Returns + ------- + out : scalar, usm_ndarray + A scalar if the input `a` is scalar. + A dpctl USM ndarray if the input `a` is array. + + Raises + ------ + TypeError + If input parameter `a` is of unsupported object type. + + """ + + return a if isscalar(a) else get_usm_ndarray(a) + + def is_supported_array_type(a): """ Return ``True`` if an array of either type :class:`dpnp.ndarray` diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 98dcc71d31af..9e877703b409 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -40,8 +40,11 @@ """ -from dpnp.dpnp_algo import * -from dpnp.dpnp_utils import * +from .dpnp_algo import * +from .dpnp_algo.dpnp_elementwise_common import ( + dpnp_divide +) +from .dpnp_utils import * import dpnp @@ -586,6 +589,7 @@ def divide(x1, out=None, *, where=True, + order='K', dtype=None, subok=True, **kwargs): @@ -617,28 +621,26 @@ def divide(x1, """ - if out is not None: - pass - elif where is not True: + if where is not True: pass elif dtype is not None: pass elif subok is not True: pass + elif kwargs: + pass elif dpnp.isscalar(x1) and dpnp.isscalar(x2): # at least either x1 or x2 has to be an array pass else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, alloc_queue=queue) - if x1_desc and x2_desc: - return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() + if order in "afkcAFKC": + order = order.upper() + elif order is None: + order = 'K' + else: + raise ValueError("order must be one of 'C', 'F', 'A', or 'K' (got '{}')".format(order)) + return dpnp_divide(x1, x2, out=out, order=order) return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 5f0d73b23b7b..6224eb57ab81 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -284,7 +284,7 @@ def test_divide_scalar(shape, dtype): result = 0.5 / dpnp_a / 1.7 expected = 0.5 / np_a / 1.7 - assert_allclose(result, expected) + assert_allclose(result, expected, rtol=1e-6) @pytest.mark.parametrize("shape", diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index e5e53646e1a1..3182003a90be 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -6,6 +6,9 @@ import dpnp import dpctl +from dpctl.utils import ( + ExecutionPlacementError +) import numpy from numpy.testing import ( @@ -431,7 +434,7 @@ def test_broadcasting(func, data1, data2, device): def test_2in_1out_diff_queue_but_equal_context(func, device): x1 = dpnp.arange(10) x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1] - with assert_raises(ValueError): + with assert_raises((ValueError, ExecutionPlacementError)): getattr(dpnp, func)(x1, x2) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 39dc3e10f721..71f33429c704 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -168,13 +168,6 @@ def check_binary(self, xp): y = y.astype(dtype1) elif is_array_arg2 and not is_array_arg1: y = y.astype(dtype2) - elif self.name in ('divide', 'true_divide'): - # If one input is an array of float32 and another - an integer or floating scalar, - # NumPy will return an output array of float32, while DPNP will return the array of float64, - # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type. - if not (is_array_arg1 and is_array_arg2): - if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32): - y = y.astype(numpy.float32) # NumPy returns different values (nan/inf) on division by zero # depending on the architecture.