From a8e2afbbd045eed8e02a90c4834883f3eb2dc099 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 4 Jun 2024 14:36:41 +0200 Subject: [PATCH 01/35] Preparation to reuse common dpctl f/w for VM functions --- .../elementwise_functions.hpp | 820 ++++++++++++++++++ .../elementwise_functions_type_utils.hpp | 88 ++ .../simplify_iteration_space.hpp | 418 +++++++++ dpnp/backend/extensions/vm/abs.hpp | 43 +- dpnp/backend/extensions/vm/common.hpp | 19 + dpnp/backend/extensions/vm/vm_py.cpp | 36 +- 6 files changed, 1405 insertions(+), 19 deletions(-) create mode 100644 dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp create mode 100644 dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp create mode 100644 dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp new file mode 100644 index 000000000000..5b2dbd74fd8c --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -0,0 +1,820 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "dpctl4pybind11.hpp" +#include + +#include "elementwise_functions_type_utils.hpp" +#include "simplify_iteration_space.hpp" + +// dpctl tensor headers +#include "kernels/alignment.hpp" +// #include "kernels/dpctl_tensor_types.hpp" +// #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +static_assert(std::is_same_v); + +namespace dpnp::backend::ext::py_internal +{ + +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + +/*! @brief Template implementing Python API for unary elementwise functions */ +template +std::pair + py_unary_ufunc(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &q, + const std::vector &depends, + // + const output_typesT &output_type_vec, + const contig_dispatchT &contig_dispatch_vector, + const strided_dispatchT &strided_dispatch_vector) +{ + int src_typenum = src.get_typenum(); + int dst_typenum = dst.get_typenum(); + + const auto &array_types = td_ns::usm_ndarray_types(); + int src_typeid = array_types.typenum_to_lookup_id(src_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + int func_output_typeid = output_type_vec[src_typeid]; + + // check that types are supported + if (dst_typeid != func_output_typeid) { + throw py::value_error( + "Destination array has unexpected elemental data type."); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(q, {src, dst})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + + // check that dimensions are the same + int src_nd = src.get_ndim(); + if (src_nd != dst.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t *src_shape = src.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < src_nd; ++i) { + src_nelems *= static_cast(src_shape[i]); + shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems); + + // check memory overlap + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if (overlap(src, dst) && !same_logical_tensors(src, dst)) { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char *src_data = src.get_data(); + char *dst_data = dst.get_data(); + + // handle contiguous inputs + bool is_src_c_contig = src.is_c_contiguous(); + bool is_src_f_contig = src.is_f_contiguous(); + + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); + + bool both_c_contig = (is_src_c_contig && is_dst_c_contig); + bool both_f_contig = (is_src_f_contig && is_dst_f_contig); + + if (both_c_contig || both_f_contig) { + auto contig_fn = contig_dispatch_vector[src_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + auto comp_ev = contig_fn(q, src_nelems, src_data, dst_data, depends); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(q, {src, dst}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + // simplify iteration space + // if 1d with strides 1 - input is contig + // dispatch to strided + + auto const &src_strides = src.get_strides_vector(); + auto const &dst_strides = dst.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_src_strides; + shT simplified_dst_strides; + py::ssize_t src_offset(0); + py::ssize_t dst_offset(0); + + int nd = src_nd; + const py::ssize_t *shape = src_shape; + + simplify_iteration_space(nd, shape, src_strides, dst_strides, + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); + + if (nd == 1 && simplified_src_strides[0] == 1 && + simplified_dst_strides[0] == 1) { + // Special case of contiguous data + auto contig_fn = contig_dispatch_vector[src_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + int src_elem_size = src.get_elemsize(); + int dst_elem_size = dst.get_elemsize(); + auto comp_ev = + contig_fn(q, src_nelems, src_data + src_elem_size * src_offset, + dst_data + dst_elem_size * dst_offset, depends); + + sycl::event ht_ev = + dpctl::utils::keep_args_alive(q, {src, dst}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + // Strided implementation + auto strided_fn = strided_dispatch_vector[src_typeid]; + + if (strided_fn == nullptr) { + throw std::runtime_error( + "Strided implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + + std::vector host_tasks{}; + host_tasks.reserve(2); + + const auto &ptr_size_event_triple_ = device_allocate_and_pack( + q, host_tasks, simplified_shape, simplified_src_strides, + simplified_dst_strides); + py::ssize_t *shape_strides = std::get<0>(ptr_size_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_size_event_triple_); + + if (shape_strides == nullptr) { + throw std::runtime_error("Device memory allocation failed"); + } + + sycl::event strided_fn_ev = + strided_fn(q, src_nelems, nd, shape_strides, src_data, src_offset, + dst_data, dst_offset, depends, {copy_shape_ev}); + + // async free of shape_strides temporary + auto ctx = q.get_context(); + sycl::event tmp_cleanup_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(strided_fn_ev); + cgh.host_task( + [ctx, shape_strides]() { sycl::free(shape_strides, ctx); }); + }); + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(q, {src, dst}, host_tasks), + strided_fn_ev); +} + +/*! @brief Template implementing Python API for querying of type support by + * unary elementwise functions */ +template +py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, + const output_typesT &output_types) +{ + int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl + int src_typeid = -1; + + auto array_types = td_ns::usm_ndarray_types(); + + try { + src_typeid = array_types.typenum_to_lookup_id(tn); + } catch (const std::exception &e) { + throw py::value_error(e.what()); + } + + using type_utils::_result_typeid; + int dst_typeid = _result_typeid(src_typeid, output_types); + + if (dst_typeid < 0) { + auto res = py::none(); + return py::cast(res); + } + else { + using type_utils::_dtype_from_typenum; + + auto dst_typenum_t = static_cast(dst_typeid); + auto dt = _dtype_from_typenum(dst_typenum_t); + + return py::cast(dt); + } +} + +// ======================== Binary functions =========================== + +namespace +{ +template +bool isEqual(Container const &c, std::initializer_list const &l) +{ + return std::equal(std::begin(c), std::end(c), std::begin(l), std::end(l)); +} +} // namespace + +/*! @brief Template implementing Python API for binary elementwise + * functions */ +template +std::pair py_binary_ufunc( + const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, // dst = op(src1, src2), elementwise + sycl::queue &exec_q, + const std::vector depends, + // + const output_typesT &output_type_table, + const contig_dispatchT &contig_dispatch_table, + const strided_dispatchT &strided_dispatch_table, + const contig_matrix_row_dispatchT + &contig_matrix_row_broadcast_dispatch_table, + const contig_row_matrix_dispatchT + &contig_row_matrix_broadcast_dispatch_table) +{ + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + int output_typeid = output_type_table[src1_typeid][src2_typeid]; + + if (output_typeid != dst_typeid) { + throw py::value_error( + "Destination array has unexpected elemental data type."); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + + // check shapes, broadcasting is assumed done by caller + // check that dimensions are the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t *src1_shape = src1.get_shape_raw(); + const py::ssize_t *src2_shape = src2.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && + src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems); + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if ((overlap(src1, dst) && !same_logical_tensors(src1, dst)) || + (overlap(src2, dst) && !same_logical_tensors(src2, dst))) + { + throw py::value_error("Arrays index overlapping segments of memory"); + } + // check memory overlap + const char *src1_data = src1.get_data(); + const char *src2_data = src2.get_data(); + char *dst_data = dst.get_data(); + + // handle contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src1_f_contig = src1.is_f_contiguous(); + + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_src2_f_contig = src2.is_f_contiguous(); + + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); + + bool all_c_contig = + (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + bool all_f_contig = + (is_src1_f_contig && is_src2_f_contig && is_dst_f_contig); + + // dispatch for contiguous inputs + if (all_c_contig || all_f_contig) { + auto contig_fn = contig_dispatch_table[src1_typeid][src2_typeid]; + + if (contig_fn != nullptr) { + auto comp_ev = contig_fn(exec_q, src_nelems, src1_data, 0, + src2_data, 0, dst_data, 0, depends); + sycl::event ht_ev = dpctl::utils::keep_args_alive( + exec_q, {src1, src2, dst}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + } + + // simplify strides + auto const &src1_strides = src1.get_strides_vector(); + auto const &src2_strides = src2.get_strides_vector(); + auto const &dst_strides = dst.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_src1_strides; + shT simplified_src2_strides; + shT simplified_dst_strides; + py::ssize_t src1_offset(0); + py::ssize_t src2_offset(0); + py::ssize_t dst_offset(0); + + int nd = dst_nd; + const py::ssize_t *shape = src1_shape; + + simplify_iteration_space_3( + nd, shape, src1_strides, src2_strides, dst_strides, + // outputs + simplified_shape, simplified_src1_strides, simplified_src2_strides, + simplified_dst_strides, src1_offset, src2_offset, dst_offset); + + std::vector host_tasks{}; + if (nd < 3) { + static constexpr auto unit_stride = + std::initializer_list{1}; + + if ((nd == 1) && isEqual(simplified_src1_strides, unit_stride) && + isEqual(simplified_src2_strides, unit_stride) && + isEqual(simplified_dst_strides, unit_stride)) + { + auto contig_fn = contig_dispatch_table[src1_typeid][src2_typeid]; + + if (contig_fn != nullptr) { + auto comp_ev = contig_fn(exec_q, src_nelems, src1_data, + src1_offset, src2_data, src2_offset, + dst_data, dst_offset, depends); + sycl::event ht_ev = dpctl::utils::keep_args_alive( + exec_q, {src1, src2, dst}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + } + if (nd == 2) { + static constexpr auto zero_one_strides = + std::initializer_list{0, 1}; + static constexpr auto one_zero_strides = + std::initializer_list{1, 0}; + constexpr py::ssize_t one{1}; + // special case of C-contiguous matrix and a row + if (isEqual(simplified_src2_strides, zero_one_strides) && + isEqual(simplified_src1_strides, {simplified_shape[1], one}) && + isEqual(simplified_dst_strides, {simplified_shape[1], one})) + { + auto matrix_row_broadcast_fn = + contig_matrix_row_broadcast_dispatch_table[src1_typeid] + [src2_typeid]; + if (matrix_row_broadcast_fn != nullptr) { + int src1_itemsize = src1.get_elemsize(); + int src2_itemsize = src2.get_elemsize(); + int dst_itemsize = dst.get_elemsize(); + + if (is_aligned( + src1_data + src1_offset * src1_itemsize) && + is_aligned( + src2_data + src2_offset * src2_itemsize) && + is_aligned( + dst_data + dst_offset * dst_itemsize)) + { + size_t n0 = simplified_shape[0]; + size_t n1 = simplified_shape[1]; + sycl::event comp_ev = matrix_row_broadcast_fn( + exec_q, host_tasks, n0, n1, src1_data, src1_offset, + src2_data, src2_offset, dst_data, dst_offset, + depends); + + return std::make_pair( + dpctl::utils::keep_args_alive( + exec_q, {src1, src2, dst}, host_tasks), + comp_ev); + } + } + } + if (isEqual(simplified_src1_strides, one_zero_strides) && + isEqual(simplified_src2_strides, {one, simplified_shape[0]}) && + isEqual(simplified_dst_strides, {one, simplified_shape[0]})) + { + auto row_matrix_broadcast_fn = + contig_row_matrix_broadcast_dispatch_table[src1_typeid] + [src2_typeid]; + if (row_matrix_broadcast_fn != nullptr) { + + int src1_itemsize = src1.get_elemsize(); + int src2_itemsize = src2.get_elemsize(); + int dst_itemsize = dst.get_elemsize(); + + if (is_aligned( + src1_data + src1_offset * src1_itemsize) && + is_aligned( + src2_data + src2_offset * src2_itemsize) && + is_aligned( + dst_data + dst_offset * dst_itemsize)) + { + size_t n0 = simplified_shape[1]; + size_t n1 = simplified_shape[0]; + sycl::event comp_ev = row_matrix_broadcast_fn( + exec_q, host_tasks, n0, n1, src1_data, src1_offset, + src2_data, src2_offset, dst_data, dst_offset, + depends); + + return std::make_pair( + dpctl::utils::keep_args_alive( + exec_q, {src1, src2, dst}, host_tasks), + comp_ev); + } + } + } + } + } + + // dispatch to strided code + auto strided_fn = strided_dispatch_table[src1_typeid][src2_typeid]; + + if (strided_fn == nullptr) { + throw std::runtime_error( + "Strided implementation is missing for src1_typeid=" + + std::to_string(src1_typeid) + + " and src2_typeid=" + std::to_string(src2_typeid)); + } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + const auto &ptr_sz_event_triple_ = device_allocate_and_pack( + exec_q, host_tasks, simplified_shape, simplified_src1_strides, + simplified_src2_strides, simplified_dst_strides); + + py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_sz_event_triple_); + + if (shape_strides == nullptr) { + throw std::runtime_error("Unabled to allocate device memory"); + } + + sycl::event strided_fn_ev = strided_fn( + exec_q, src_nelems, nd, shape_strides, src1_data, src1_offset, + src2_data, src2_offset, dst_data, dst_offset, depends, {copy_shape_ev}); + + // async free of shape_strides temporary + auto ctx = exec_q.get_context(); + + sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(strided_fn_ev); + cgh.host_task( + [ctx, shape_strides]() { sycl::free(shape_strides, ctx); }); + }); + + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, host_tasks), + strided_fn_ev); +} + +/*! @brief Type querying for binary elementwise functions */ +template +py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype, + const py::dtype &input2_dtype, + const output_typesT &output_types_table) +{ + int tn1 = input1_dtype.num(); // NumPy type numbers are the same as in dpctl + int tn2 = input2_dtype.num(); // NumPy type numbers are the same as in dpctl + int src1_typeid = -1; + int src2_typeid = -1; + + auto array_types = td_ns::usm_ndarray_types(); + + try { + src1_typeid = array_types.typenum_to_lookup_id(tn1); + src2_typeid = array_types.typenum_to_lookup_id(tn2); + } catch (const std::exception &e) { + throw py::value_error(e.what()); + } + + if (src1_typeid < 0 || src1_typeid >= td_ns::num_types || src2_typeid < 0 || + src2_typeid >= td_ns::num_types) + { + throw std::runtime_error("binary output type lookup failed"); + } + int dst_typeid = output_types_table[src1_typeid][src2_typeid]; + + if (dst_typeid < 0) { + auto res = py::none(); + return py::cast(res); + } + else { + using type_utils::_dtype_from_typenum; + + auto dst_typenum_t = static_cast(dst_typeid); + auto dt = _dtype_from_typenum(dst_typenum_t); + + return py::cast(dt); + } +} + +// ==================== Inplace binary functions ======================= + +template +std::pair + py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs, + const dpctl::tensor::usm_ndarray &rhs, + sycl::queue &exec_q, + const std::vector depends, + // + const output_typesT &output_type_table, + const contig_dispatchT &contig_dispatch_table, + const strided_dispatchT &strided_dispatch_table, + const contig_row_matrix_dispatchT + &contig_row_matrix_broadcast_dispatch_table) +{ + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(lhs); + + // check type_nums + int rhs_typenum = rhs.get_typenum(); + int lhs_typenum = lhs.get_typenum(); + + auto array_types = td_ns::usm_ndarray_types(); + int rhs_typeid = array_types.typenum_to_lookup_id(rhs_typenum); + int lhs_typeid = array_types.typenum_to_lookup_id(lhs_typenum); + + int output_typeid = output_type_table[rhs_typeid][lhs_typeid]; + + if (output_typeid != lhs_typeid) { + throw py::value_error( + "Left-hand side array has unexpected elemental data type."); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {rhs, lhs})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + // check shapes, broadcasting is assumed done by caller + // check that dimensions are the same + int lhs_nd = lhs.get_ndim(); + if (lhs_nd != rhs.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t *rhs_shape = rhs.get_shape_raw(); + const py::ssize_t *lhs_shape = lhs.get_shape_raw(); + bool shapes_equal(true); + size_t rhs_nelems(1); + + for (int i = 0; i < lhs_nd; ++i) { + rhs_nelems *= static_cast(rhs_shape[i]); + shapes_equal = shapes_equal && (rhs_shape[i] == lhs_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (rhs_nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(lhs, rhs_nelems); + + // check memory overlap + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(rhs, lhs) && !same_logical_tensors(rhs, lhs)) { + throw py::value_error("Arrays index overlapping segments of memory"); + } + // check memory overlap + const char *rhs_data = rhs.get_data(); + char *lhs_data = lhs.get_data(); + + // handle contiguous inputs + bool is_rhs_c_contig = rhs.is_c_contiguous(); + bool is_rhs_f_contig = rhs.is_f_contiguous(); + + bool is_lhs_c_contig = lhs.is_c_contiguous(); + bool is_lhs_f_contig = lhs.is_f_contiguous(); + + bool both_c_contig = (is_rhs_c_contig && is_lhs_c_contig); + bool both_f_contig = (is_rhs_f_contig && is_lhs_f_contig); + + // dispatch for contiguous inputs + if (both_c_contig || both_f_contig) { + auto contig_fn = contig_dispatch_table[rhs_typeid][lhs_typeid]; + + if (contig_fn != nullptr) { + auto comp_ev = contig_fn(exec_q, rhs_nelems, rhs_data, 0, lhs_data, + 0, depends); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {rhs, lhs}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + } + + // simplify strides + auto const &rhs_strides = rhs.get_strides_vector(); + auto const &lhs_strides = lhs.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_rhs_strides; + shT simplified_lhs_strides; + py::ssize_t rhs_offset(0); + py::ssize_t lhs_offset(0); + + int nd = lhs_nd; + const py::ssize_t *shape = rhs_shape; + + simplify_iteration_space(nd, shape, rhs_strides, lhs_strides, + // outputs + simplified_shape, simplified_rhs_strides, + simplified_lhs_strides, rhs_offset, lhs_offset); + + std::vector host_tasks{}; + if (nd < 3) { + static constexpr auto unit_stride = + std::initializer_list{1}; + + if ((nd == 1) && isEqual(simplified_rhs_strides, unit_stride) && + isEqual(simplified_lhs_strides, unit_stride)) + { + auto contig_fn = contig_dispatch_table[rhs_typeid][lhs_typeid]; + + if (contig_fn != nullptr) { + auto comp_ev = + contig_fn(exec_q, rhs_nelems, rhs_data, rhs_offset, + lhs_data, lhs_offset, depends); + sycl::event ht_ev = dpctl::utils::keep_args_alive( + exec_q, {rhs, lhs}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + } + if (nd == 2) { + static constexpr auto one_zero_strides = + std::initializer_list{1, 0}; + constexpr py::ssize_t one{1}; + // special case of C-contiguous matrix and a row + if (isEqual(simplified_rhs_strides, one_zero_strides) && + isEqual(simplified_lhs_strides, {one, simplified_shape[0]})) + { + auto row_matrix_broadcast_fn = + contig_row_matrix_broadcast_dispatch_table[rhs_typeid] + [lhs_typeid]; + if (row_matrix_broadcast_fn != nullptr) { + size_t n0 = simplified_shape[1]; + size_t n1 = simplified_shape[0]; + sycl::event comp_ev = row_matrix_broadcast_fn( + exec_q, host_tasks, n0, n1, rhs_data, rhs_offset, + lhs_data, lhs_offset, depends); + + return std::make_pair(dpctl::utils::keep_args_alive( + exec_q, {lhs, rhs}, host_tasks), + comp_ev); + } + } + } + } + + // dispatch to strided code + auto strided_fn = strided_dispatch_table[rhs_typeid][lhs_typeid]; + + if (strided_fn == nullptr) { + throw std::runtime_error( + "Strided implementation is missing for rhs_typeid=" + + std::to_string(rhs_typeid) + + " and lhs_typeid=" + std::to_string(lhs_typeid)); + } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + const auto &ptr_sz_event_triple_ = device_allocate_and_pack( + exec_q, host_tasks, simplified_shape, simplified_rhs_strides, + simplified_lhs_strides); + + py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_sz_event_triple_); + + if (shape_strides == nullptr) { + throw std::runtime_error("Unabled to allocate device memory"); + } + + sycl::event strided_fn_ev = + strided_fn(exec_q, rhs_nelems, nd, shape_strides, rhs_data, rhs_offset, + lhs_data, lhs_offset, depends, {copy_shape_ev}); + + // async free of shape_strides temporary + auto ctx = exec_q.get_context(); + + sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(strided_fn_ev); + cgh.host_task( + [ctx, shape_strides]() { sycl::free(shape_strides, ctx); }); + }); + + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(exec_q, {rhs, lhs}, host_tasks), + strided_fn_ev); +} + +} // namespace dpnp::backend::ext::py_internal diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp new file mode 100644 index 000000000000..f3e43b34ba17 --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -0,0 +1,88 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include "dpctl4pybind11.hpp" +#include + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp::backend::ext::py_internal::type_utils +{ + +/*! @brief Produce dtype from a type number */ +py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) +{ + switch (dst_typenum_t) { + case td_ns::typenum_t::BOOL: + return py::dtype("?"); + case td_ns::typenum_t::INT8: + return py::dtype("i1"); + case td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case td_ns::typenum_t::INT16: + return py::dtype("i2"); + case td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case td_ns::typenum_t::INT32: + return py::dtype("i4"); + case td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case td_ns::typenum_t::INT64: + return py::dtype("i8"); + case td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case td_ns::typenum_t::HALF: + return py::dtype("f2"); + case td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +/*! @brief Lookup typeid of the result from typeid of + * argument and the mapping table */ +int _result_typeid(int arg_typeid, const int *fn_output_id) +{ + if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { + throw py::value_error("Input typeid " + std::to_string(arg_typeid) + + " is outside of expected bounds."); + } + + return fn_output_id[arg_typeid]; +} +} // namespace dpnp::backend::ext::py_internal::type_utils diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp new file mode 100644 index 000000000000..ae86d94924bd --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp @@ -0,0 +1,418 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include + +namespace dpnp::backend::ext::py_internal +{ + +namespace py = pybind11; + +template +int simplify_iteration_two_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy &disp1, + StridesTy &disp2) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort( + pos.begin(), pos.end(), [&strides1, &strides2, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + return (abs_str2_i1 > abs_str2_i2) || + (abs_str2_i1 == abs_str2_i2 && + (abs_str1_i1 > abs_str1_i2 || + (abs_str1_i1 == abs_str1_i2 && shape[i1] > shape[i2]))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && std::min(str1_p, str2_p) < 0) { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + } + if (str1_p < 0 || str2_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + } + + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + + if (jump1 == str1 && jump2 == str2) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + + return nd_; +} + +template +int simplify_iteration_three_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy *strides3, + StridesTy &disp1, + StridesTy &disp2, + StridesTy &disp3) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort(pos.begin(), pos.end(), + [&strides1, &strides2, &strides3, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + auto abs_str3_i1 = + (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; + auto abs_str3_i2 = + (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; + return (abs_str3_i1 > abs_str3_i2) || + ((abs_str3_i1 == abs_str3_i2) && + ((abs_str2_i1 > abs_str2_i2) || + ((abs_str2_i1 == abs_str2_i2) && + ((abs_str1_i1 > abs_str1_i2) || + ((abs_str1_i1 == abs_str1_i2) && + (shape[i1] > shape[i2])))))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + std::vector strides3_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + auto str3_p = strides3[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && + std::min({str1_p, str2_p, str3_p}) < 0) + { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + disp3 += str3_p * (sh_p - 1); + str3_p = -str3_p; + } + if (str1_p < 0 || str2_p < 0 || str3_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + strides3_w.push_back(str3_p); + } + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy str3 = strides3_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; + + if (jump1 == str1 && jump2 == str2 && jump3 == str3) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides3_w[j] = strides3_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides3[i] = strides3_w[i]; + } + + return nd_; +} + +void simplify_iteration_space(int &nd, + const py::ssize_t *const &shape, + std::vector const &src_strides, + std::vector const &dst_strides, + // output + std::vector &simplified_shape, + std::vector &simplified_src_strides, + std::vector &simplified_dst_strides, + py::ssize_t &src_offset, + py::ssize_t &dst_offset) +{ + if (nd > 1) { + // Simplify iteration space to reduce dimensionality + // and improve access pattern + simplified_shape.reserve(nd); + simplified_shape.insert(std::begin(simplified_shape), shape, + shape + nd); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src_strides.reserve(nd); + simplified_src_strides.insert(std::end(simplified_src_strides), + std::begin(src_strides), + std::end(src_strides)); + assert(simplified_src_strides.size() == static_cast(nd)); + + simplified_dst_strides.reserve(nd); + simplified_dst_strides.insert(std::end(simplified_dst_strides), + std::begin(dst_strides), + std::end(dst_strides)); + assert(simplified_dst_strides.size() == static_cast(nd)); + + int contracted_nd = simplify_iteration_two_strides( + nd, simplified_shape.data(), simplified_src_strides.data(), + simplified_dst_strides.data(), + src_offset, // modified by reference + dst_offset // modified by reference + ); + simplified_shape.resize(contracted_nd); + simplified_src_strides.resize(contracted_nd); + simplified_dst_strides.resize(contracted_nd); + + nd = contracted_nd; + } + else if (nd == 1) { + src_offset = 0; + dst_offset = 0; + // Populate vectors + simplified_shape.reserve(nd); + simplified_shape.push_back(shape[0]); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src_strides.reserve(nd); + simplified_dst_strides.reserve(nd); + + if (src_strides[0] < 0 && dst_strides[0] < 0) { + simplified_src_strides.push_back(-src_strides[0]); + simplified_dst_strides.push_back(-dst_strides[0]); + if (shape[0] > 1) { + src_offset += (shape[0] - 1) * src_strides[0]; + dst_offset += (shape[0] - 1) * dst_strides[0]; + } + } + else { + simplified_src_strides.push_back(src_strides[0]); + simplified_dst_strides.push_back(dst_strides[0]); + } + + assert(simplified_src_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); + } +} + +void simplify_iteration_space_3( + int &nd, + const py::ssize_t *const &shape, + // src1 + std::vector const &src1_strides, + // src2 + std::vector const &src2_strides, + // dst + std::vector const &dst_strides, + // output + std::vector &simplified_shape, + std::vector &simplified_src1_strides, + std::vector &simplified_src2_strides, + std::vector &simplified_dst_strides, + py::ssize_t &src1_offset, + py::ssize_t &src2_offset, + py::ssize_t &dst_offset) +{ + if (nd > 1) { + // Simplify iteration space to reduce dimensionality + // and improve access pattern + simplified_shape.reserve(nd); + simplified_shape.insert(std::end(simplified_shape), shape, shape + nd); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src1_strides.reserve(nd); + simplified_src1_strides.insert(std::end(simplified_src1_strides), + std::begin(src1_strides), + std::end(src1_strides)); + assert(simplified_src1_strides.size() == static_cast(nd)); + + simplified_src2_strides.reserve(nd); + simplified_src2_strides.insert(std::end(simplified_src2_strides), + std::begin(src2_strides), + std::end(src2_strides)); + assert(simplified_src2_strides.size() == static_cast(nd)); + + simplified_dst_strides.reserve(nd); + simplified_dst_strides.insert(std::end(simplified_dst_strides), + std::begin(dst_strides), + std::end(dst_strides)); + assert(simplified_dst_strides.size() == static_cast(nd)); + + int contracted_nd = simplify_iteration_three_strides( + nd, simplified_shape.data(), simplified_src1_strides.data(), + simplified_src2_strides.data(), simplified_dst_strides.data(), + src1_offset, // modified by reference + src2_offset, // modified by reference + dst_offset // modified by reference + ); + simplified_shape.resize(contracted_nd); + simplified_src1_strides.resize(contracted_nd); + simplified_src2_strides.resize(contracted_nd); + simplified_dst_strides.resize(contracted_nd); + + nd = contracted_nd; + } + else if (nd == 1) { + src1_offset = 0; + src2_offset = 0; + dst_offset = 0; + // Populate vectors + simplified_shape.reserve(nd); + simplified_shape.push_back(shape[0]); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src1_strides.reserve(nd); + simplified_src2_strides.reserve(nd); + simplified_dst_strides.reserve(nd); + + if ((src1_strides[0] < 0) && (src2_strides[0] < 0) && + (dst_strides[0] < 0)) { + simplified_src1_strides.push_back(-src1_strides[0]); + simplified_src2_strides.push_back(-src2_strides[0]); + simplified_dst_strides.push_back(-dst_strides[0]); + if (shape[0] > 1) { + src1_offset += src1_strides[0] * (shape[0] - 1); + src2_offset += src2_strides[0] * (shape[0] - 1); + dst_offset += dst_strides[0] * (shape[0] - 1); + } + } + else { + simplified_src1_strides.push_back(src1_strides[0]); + simplified_src2_strides.push_back(src2_strides[0]); + simplified_dst_strides.push_back(dst_strides[0]); + } + + assert(simplified_src1_strides.size() == static_cast(nd)); + assert(simplified_src2_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); + } +} +} // namespace dpnp::backend::ext::py_internal diff --git a/dpnp/backend/extensions/vm/abs.hpp b/dpnp/backend/extensions/vm/abs.hpp index bb5e55010b4f..a3e660483967 100644 --- a/dpnp/backend/extensions/vm/abs.hpp +++ b/dpnp/backend/extensions/vm/abs.hpp @@ -30,23 +30,23 @@ #include "common.hpp" #include "types_matrix.hpp" -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace vm +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp::backend::ext::vm { template -sycl::event abs_contig_impl(sycl::queue exec_q, - const std::int64_t n, +sycl::event abs_contig_impl(sycl::queue &exec_q, + std::size_t in_n, const char *in_a, char *out_y, const std::vector &depends) { type_utils::validate_type_for_device(exec_q); + std::int64_t n = static_cast(in_n); const T *a = reinterpret_cast(in_a); using resTy = typename types::AbsOutputType::value_type; resTy *y = reinterpret_cast(out_y); @@ -73,7 +73,24 @@ struct AbsContigFactory } } }; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp + +template +struct AbsStridedFactory +{ + fnT get() + { + return nullptr; + } +}; + +template +struct AbsTypeMapFactory +{ + /*! @brief get typeid for output type of abs(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename types::AbsOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; +} // namespace dpnp::backend::ext::vm diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index b53b9b0881ca..c8c12db2e529 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -56,6 +56,25 @@ namespace ext { namespace vm { +typedef sycl::event (*unary_contig_impl_fn_ptr_t)( + sycl::queue &, + size_t, + const char *, + char *, + const std::vector &); + +typedef sycl::event (*unary_strided_impl_fn_ptr_t)( + sycl::queue &, + size_t, + int, + const ssize_t *, + const char *, + ssize_t, + char *, + ssize_t, + const std::vector &, + const std::vector &); + typedef sycl::event (*unary_impl_fn_ptr_t)(sycl::queue, const std::int64_t, const char *, diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 74d2ae677943..6b8b4dbd16d3 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -68,13 +68,19 @@ #include "trunc.hpp" #include "types_matrix.hpp" +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + namespace py = pybind11; +namespace py_int = dpnp::backend::ext::py_internal; namespace vm_ext = dpnp::backend::ext::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t abs_dispatch_vector[dpctl_td_ns::num_types]; +// static unary_impl_fn_ptr_t abs_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t acos_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t acosh_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t add_dispatch_vector[dpctl_td_ns::num_types]; @@ -110,6 +116,16 @@ static unary_impl_fn_ptr_t tan_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t tanh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t trunc_dispatch_vector[dpctl_td_ns::num_types]; +using vm_ext::unary_contig_impl_fn_ptr_t; +using vm_ext::unary_strided_impl_fn_ptr_t; + +static unary_contig_impl_fn_ptr_t + abs_contig_dispatch_vector[dpctl_td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + abs_strided_dispatch_vector[dpctl_td_ns::num_types]; + +static int abs_output_typeid_vector[td_ns::num_types]; + PYBIND11_MODULE(_vm_impl, m) { using arrayT = dpctl::tensor::usm_ndarray; @@ -117,14 +133,22 @@ PYBIND11_MODULE(_vm_impl, m) // UnaryUfunc: ==== Abs(x) ==== { - vm_ext::init_ufunc_dispatch_vector( - abs_dispatch_vector); + abs_contig_dispatch_vector); + + vm_ext::init_ufunc_dispatch_vector( + abs_strided_dispatch_vector); + + vm_ext::init_ufunc_dispatch_vector( + abs_output_typeid_vector); auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - abs_dispatch_vector); + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, abs_output_typeid_vector, + abs_contig_dispatch_vector, abs_strided_dispatch_vector); }; m.def("_abs", abs_pyapi, "Call `abs` function from OneMKL VM library to compute " @@ -135,7 +159,7 @@ PYBIND11_MODULE(_vm_impl, m) auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - abs_dispatch_vector); + abs_contig_dispatch_vector); }; m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, "Check input arguments to answer if `abs` function from " From a61d4e82481675c4b011b3762d7a5d88d28a4800 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 4 Jun 2024 17:19:40 +0200 Subject: [PATCH 02/35] PoC to decouple abs implementation to separate source file --- .../elementwise_functions.hpp | 12 +- .../elementwise_functions_type_utils.hpp | 8 +- .../simplify_iteration_space.hpp | 6 +- dpnp/backend/extensions/vm/CMakeLists.txt | 7 +- dpnp/backend/extensions/vm/abs.cpp | 179 ++++++++++++++++++ dpnp/backend/extensions/vm/abs.hpp | 72 +------ dpnp/backend/extensions/vm/common.hpp | 35 ++-- dpnp/backend/extensions/vm/types_matrix.hpp | 17 -- dpnp/backend/extensions/vm/vm_py.cpp | 54 +----- 9 files changed, 230 insertions(+), 160 deletions(-) create mode 100644 dpnp/backend/extensions/vm/abs.cpp diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index 5b2dbd74fd8c..01013d10f5df 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -25,16 +25,20 @@ #pragma once -#include "dpctl4pybind11.hpp" #include +#include "dpctl4pybind11.hpp" +#include +#include +#include + #include "elementwise_functions_type_utils.hpp" #include "simplify_iteration_space.hpp" // dpctl tensor headers #include "kernels/alignment.hpp" // #include "kernels/dpctl_tensor_types.hpp" -// #include "utils/memory_overlap.hpp" +#include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" @@ -44,7 +48,7 @@ namespace td_ns = dpctl::tensor::type_dispatch; static_assert(std::is_same_v); -namespace dpnp::backend::ext::py_internal +namespace dpnp::extensions::py_internal { using dpctl::tensor::kernels::alignment_utils::is_aligned; @@ -817,4 +821,4 @@ std::pair strided_fn_ev); } -} // namespace dpnp::backend::ext::py_internal +} // namespace dpnp::extensions::py_internal diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index f3e43b34ba17..db5a26fb24cc 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -26,7 +26,9 @@ #pragma once #include "dpctl4pybind11.hpp" -#include +#include +#include +#include // dpctl tensor headers #include "utils/type_dispatch.hpp" @@ -34,7 +36,7 @@ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace dpnp::backend::ext::py_internal::type_utils +namespace dpnp::extensions::py_internal::type_utils { /*! @brief Produce dtype from a type number */ @@ -85,4 +87,4 @@ int _result_typeid(int arg_typeid, const int *fn_output_id) return fn_output_id[arg_typeid]; } -} // namespace dpnp::backend::ext::py_internal::type_utils +} // namespace dpnp::extensions::py_internal::type_utils diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp index ae86d94924bd..aa47831426c8 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp @@ -27,9 +27,11 @@ #include +#include "dpctl4pybind11.hpp" #include +#include -namespace dpnp::backend::ext::py_internal +namespace dpnp::extensions::py_internal { namespace py = pybind11; @@ -415,4 +417,4 @@ void simplify_iteration_space_3( assert(simplified_dst_strides.size() == static_cast(nd)); } } -} // namespace dpnp::backend::ext::py_internal +} // namespace dpnp::extensions::py_internal diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 1fa895f4e696..3c30d27cc9ab 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -23,12 +23,17 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +set(_elementwise_sources + ${CMAKE_CURRENT_SOURCE_DIR}/abs.cpp +) -set(python_module_name _vm_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/vm_py.cpp + ${_elementwise_sources} ) +set(python_module_name _vm_impl) + pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp new file mode 100644 index 000000000000..44f594850b8f --- /dev/null +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -0,0 +1,179 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" +#include +#include + +#include "abs.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::abs function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct AbsOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, double>, + td_ns::TypeMapResultEntry, float>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event abs_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename AbsOutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::abs(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +template +struct AbsContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + return nullptr; + } + else { + return abs_contig_impl; + } + } +}; + +template +struct AbsTypeMapFactory +{ + std::enable_if_t::value, int> get() + { + using rT = typename AbsOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int abs_output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t abs_contig_dispatch_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + abs_strided_dispatch_vector[td_ns::num_types]; + +static void populate_abs_dispatch_vectors(void) +{ + vm_ext::init_ufunc_dispatch_vector( + abs_output_typeid_vector); + vm_ext::init_ufunc_dispatch_vector( + abs_contig_dispatch_vector); + // no support of strided implementation in OneMKL + vm_ext::init_ufunc_dispatch_vector( + abs_strided_dispatch_vector); +}; +} // namespace impl + +void init_abs(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_abs_dispatch_vectors(); + using impl::abs_contig_dispatch_vector; + using impl::abs_output_typeid_vector; + using impl::abs_strided_dispatch_vector; + + auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, abs_output_typeid_vector, + abs_contig_dispatch_vector, abs_strided_dispatch_vector); + }; + m.def("_abs", abs_pyapi, + "Call `abs` function from OneMKL VM library to compute " + "the absolute value of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + abs_contig_dispatch_vector); + }; + m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, + "Check input arguments to answer if `abs` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} + +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/abs.hpp b/dpnp/backend/extensions/vm/abs.hpp index a3e660483967..9a76a89b97c3 100644 --- a/dpnp/backend/extensions/vm/abs.hpp +++ b/dpnp/backend/extensions/vm/abs.hpp @@ -25,72 +25,12 @@ #pragma once -#include +#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -// dpctl tensor headers -#include "utils/type_dispatch.hpp" - -namespace td_ns = dpctl::tensor::type_dispatch; - -namespace dpnp::backend::ext::vm -{ -template -sycl::event abs_contig_impl(sycl::queue &exec_q, - std::size_t in_n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - std::int64_t n = static_cast(in_n); - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AbsOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::abs(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AbsContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AbsOutputType::value_type, void>) - { - return nullptr; - } - else { - return abs_contig_impl; - } - } -}; - -template -struct AbsStridedFactory -{ - fnT get() - { - return nullptr; - } -}; - -template -struct AbsTypeMapFactory +namespace dpnp::extensions::vm { - /*! @brief get typeid for output type of abs(T x) */ - std::enable_if_t::value, int> get() - { - using rT = typename types::AbsOutputType::value_type; - return td_ns::GetTypeid{}.get(); - } -}; -} // namespace dpnp::backend::ext::vm +void init_abs(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index c8c12db2e529..a30552da891c 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -48,13 +48,7 @@ namespace mkl_vm = oneapi::mkl::vm; // dpctl namespace for type utils namespace type_utils = dpctl::tensor::type_utils; -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace vm +namespace dpnp::backend::ext::vm { typedef sycl::event (*unary_contig_impl_fn_ptr_t)( sycl::queue &, @@ -459,17 +453,28 @@ bool need_to_call_binary_ufunc(sycl::queue exec_q, return true; } +/** + * @brief A factory with no support provided to the implementation. + * + * @tparam fnT Type of function pointer to dispatch the implementation + * @tparam T Type of input vector + */ +template +struct NoSupportFactory +{ + fnT get() + { + return nullptr; + } +}; + template - typename factoryT> + typename factoryT, + int _num_types = dpctl_td_ns::num_types> void init_ufunc_dispatch_vector(dispatchT dispatch_vector[]) { - dpctl_td_ns::DispatchVectorBuilder - contig; + dpctl_td_ns::DispatchVectorBuilder contig; contig.populate_dispatch_vector(dispatch_vector); } -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +} // namespace dpnp::backend::ext::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 5b4ccb8fdf67..656fc4b99c48 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,23 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::abs function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AbsOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, double>, - dpctl_td_ns::TypeMapResultEntry, float>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::acos function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 6b8b4dbd16d3..c94fab5d262c 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -68,19 +68,13 @@ #include "trunc.hpp" #include "types_matrix.hpp" -// include a local copy of elementwise common header from dpctl tensor: -// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp -// TODO: replace by including dpctl header once available -#include "../elementwise_functions/elementwise_functions.hpp" - namespace py = pybind11; -namespace py_int = dpnp::backend::ext::py_internal; namespace vm_ext = dpnp::backend::ext::vm; +namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -// static unary_impl_fn_ptr_t abs_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t acos_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t acosh_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t add_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,56 +110,12 @@ static unary_impl_fn_ptr_t tan_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t tanh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t trunc_dispatch_vector[dpctl_td_ns::num_types]; -using vm_ext::unary_contig_impl_fn_ptr_t; -using vm_ext::unary_strided_impl_fn_ptr_t; - -static unary_contig_impl_fn_ptr_t - abs_contig_dispatch_vector[dpctl_td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - abs_strided_dispatch_vector[dpctl_td_ns::num_types]; - -static int abs_output_typeid_vector[td_ns::num_types]; - PYBIND11_MODULE(_vm_impl, m) { using arrayT = dpctl::tensor::usm_ndarray; using event_vecT = std::vector; - // UnaryUfunc: ==== Abs(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - abs_contig_dispatch_vector); - - vm_ext::init_ufunc_dispatch_vector( - abs_strided_dispatch_vector); - - vm_ext::init_ufunc_dispatch_vector( - abs_output_typeid_vector); - - auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return py_int::py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, - abs_contig_dispatch_vector, abs_strided_dispatch_vector); - }; - m.def("_abs", abs_pyapi, - "Call `abs` function from OneMKL VM library to compute " - "the absolute value of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - abs_contig_dispatch_vector); - }; - m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, - "Check input arguments to answer if `abs` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_abs(m); // UnaryUfunc: ==== Acos(x) ==== { From ed5748308b3694cb33374d55119aa67fabf2f14c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 4 Jun 2024 20:22:13 +0200 Subject: [PATCH 03/35] Reuse typedef for function poiter from dpctl.tensor --- dpnp/backend/extensions/vm/common.hpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index a30552da891c..f6a51f3ff316 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -50,25 +50,6 @@ namespace type_utils = dpctl::tensor::type_utils; namespace dpnp::backend::ext::vm { -typedef sycl::event (*unary_contig_impl_fn_ptr_t)( - sycl::queue &, - size_t, - const char *, - char *, - const std::vector &); - -typedef sycl::event (*unary_strided_impl_fn_ptr_t)( - sycl::queue &, - size_t, - int, - const ssize_t *, - const char *, - ssize_t, - char *, - ssize_t, - const std::vector &, - const std::vector &); - typedef sycl::event (*unary_impl_fn_ptr_t)(sycl::queue, const std::int64_t, const char *, From 71d745d54153ad2a472f4f6d53abd121f61073b5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 12:12:51 +0200 Subject: [PATCH 04/35] Define populating vectors by a separate macro --- dpnp/backend/extensions/vm/abs.cpp | 65 ++++++--------------------- dpnp/backend/extensions/vm/common.hpp | 43 ++++++++++++++++++ 2 files changed, 56 insertions(+), 52 deletions(-) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index 44f594850b8f..c3e5a13e8629 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -27,8 +27,6 @@ #include #include "dpctl4pybind11.hpp" -#include -#include #include "abs.hpp" #include "common.hpp" @@ -64,7 +62,7 @@ namespace mkl_vm = oneapi::mkl::vm; * @tparam T Type of input vector `a` and of result vector `y`. */ template -struct AbsOutputType +struct OutputType { using value_type = typename std::disjunction< td_ns::TypeMapResultEntry, double>, @@ -86,7 +84,7 @@ static sycl::event abs_contig_impl(sycl::queue &exec_q, std::int64_t n = static_cast(in_n); const T *a = reinterpret_cast(in_a); - using resTy = typename AbsOutputType::value_type; + using resTy = typename OutputType::value_type; resTy *y = reinterpret_cast(out_y); return mkl_vm::abs(exec_q, @@ -96,51 +94,14 @@ static sycl::event abs_contig_impl(sycl::queue &exec_q, depends); } -template -struct AbsContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v::value_type, - void>) { - return nullptr; - } - else { - return abs_contig_impl; - } - } -}; - -template -struct AbsTypeMapFactory -{ - std::enable_if_t::value, int> get() - { - using rT = typename AbsOutputType::value_type; - return td_ns::GetTypeid{}.get(); - } -}; - using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -static int abs_output_typeid_vector[td_ns::num_types]; -static unary_contig_impl_fn_ptr_t abs_contig_dispatch_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - abs_strided_dispatch_vector[td_ns::num_types]; +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; -static void populate_abs_dispatch_vectors(void) -{ - vm_ext::init_ufunc_dispatch_vector( - abs_output_typeid_vector); - vm_ext::init_ufunc_dispatch_vector( - abs_contig_dispatch_vector); - // no support of strided implementation in OneMKL - vm_ext::init_ufunc_dispatch_vector( - abs_strided_dispatch_vector); -}; +MACRO_POPULATE_DISPATCH_VECTORS(abs); } // namespace impl void init_abs(py::module_ m) @@ -148,16 +109,16 @@ void init_abs(py::module_ m) using arrayT = dpctl::tensor::usm_ndarray; using event_vecT = std::vector; - impl::populate_abs_dispatch_vectors(); - using impl::abs_contig_dispatch_vector; - using impl::abs_output_typeid_vector; - using impl::abs_strided_dispatch_vector; + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + using impl::strided_dispatch_vector; auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, - abs_contig_dispatch_vector, abs_strided_dispatch_vector); + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, strided_dispatch_vector); }; m.def("_abs", abs_pyapi, "Call `abs` function from OneMKL VM library to compute " @@ -168,7 +129,7 @@ void init_abs(py::module_ m) auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - abs_contig_dispatch_vector); + contig_dispatch_vector); }; m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, "Check input arguments to answer if `abs` function from " diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index f6a51f3ff316..dedbb83a41f5 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -449,6 +449,49 @@ struct NoSupportFactory } }; +/** + * @brief A macro used to define factories and a populating function to dispatch + * to a callback with proper OneMKL function within VM extension scope. + */ +#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \ + template \ + struct ContigFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v::value_type, \ + void>) { \ + return nullptr; \ + } \ + else { \ + return __name__##_contig_impl; \ + } \ + } \ + }; \ + \ + template \ + struct TypeMapFactory \ + { \ + std::enable_if_t::value, int> get() \ + { \ + using rT = typename OutputType::value_type; \ + return td_ns::GetTypeid{}.get(); \ + } \ + }; \ + \ + static void populate_dispatch_vectors(void) \ + { \ + vm_ext::init_ufunc_dispatch_vector( \ + output_typeid_vector); \ + vm_ext::init_ufunc_dispatch_vector( \ + contig_dispatch_vector); \ + /* no support of strided implementation in OneMKL */ \ + vm_ext::init_ufunc_dispatch_vector( \ + strided_dispatch_vector); \ + }; + template typename factoryT, From c2ea834a14ed3d1bdd005c53fd9799598638fe52 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 13:10:34 +0200 Subject: [PATCH 05/35] Move implementation of utility functions from headers to source to resolve link issues --- .../elementwise_functions_type_utils.cpp | 86 ++++ .../elementwise_functions_type_utils.hpp | 47 +- .../simplify_iteration_space.cpp | 417 ++++++++++++++++++ .../simplify_iteration_space.hpp | 413 ++--------------- dpnp/backend/extensions/vm/CMakeLists.txt | 3 + 5 files changed, 535 insertions(+), 431 deletions(-) create mode 100644 dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp create mode 100644 dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp new file mode 100644 index 000000000000..edb6b6118804 --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -0,0 +1,86 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "dpctl4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions_type_utils.hpp" + +// dpctl tensor headers +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace dpnp::extensions::py_internal::type_utils +{ +py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) +{ + switch (dst_typenum_t) { + case td_ns::typenum_t::BOOL: + return py::dtype("?"); + case td_ns::typenum_t::INT8: + return py::dtype("i1"); + case td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case td_ns::typenum_t::INT16: + return py::dtype("i2"); + case td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case td_ns::typenum_t::INT32: + return py::dtype("i4"); + case td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case td_ns::typenum_t::INT64: + return py::dtype("i8"); + case td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case td_ns::typenum_t::HALF: + return py::dtype("f2"); + case td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +int _result_typeid(int arg_typeid, const int *fn_output_id) +{ + if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { + throw py::value_error("Input typeid " + std::to_string(arg_typeid) + + " is outside of expected bounds."); + } + + return fn_output_id[arg_typeid]; +} +} // namespace dpnp::extensions::py_internal::type_utils diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index db5a26fb24cc..ede4ea35fad7 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -38,53 +38,10 @@ namespace td_ns = dpctl::tensor::type_dispatch; namespace dpnp::extensions::py_internal::type_utils { - /*! @brief Produce dtype from a type number */ -py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) -{ - switch (dst_typenum_t) { - case td_ns::typenum_t::BOOL: - return py::dtype("?"); - case td_ns::typenum_t::INT8: - return py::dtype("i1"); - case td_ns::typenum_t::UINT8: - return py::dtype("u1"); - case td_ns::typenum_t::INT16: - return py::dtype("i2"); - case td_ns::typenum_t::UINT16: - return py::dtype("u2"); - case td_ns::typenum_t::INT32: - return py::dtype("i4"); - case td_ns::typenum_t::UINT32: - return py::dtype("u4"); - case td_ns::typenum_t::INT64: - return py::dtype("i8"); - case td_ns::typenum_t::UINT64: - return py::dtype("u8"); - case td_ns::typenum_t::HALF: - return py::dtype("f2"); - case td_ns::typenum_t::FLOAT: - return py::dtype("f4"); - case td_ns::typenum_t::DOUBLE: - return py::dtype("f8"); - case td_ns::typenum_t::CFLOAT: - return py::dtype("c8"); - case td_ns::typenum_t::CDOUBLE: - return py::dtype("c16"); - default: - throw py::value_error("Unrecognized dst_typeid"); - } -} +extern py::dtype _dtype_from_typenum(td_ns::typenum_t); /*! @brief Lookup typeid of the result from typeid of * argument and the mapping table */ -int _result_typeid(int arg_typeid, const int *fn_output_id) -{ - if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { - throw py::value_error("Input typeid " + std::to_string(arg_typeid) + - " is outside of expected bounds."); - } - - return fn_output_id[arg_typeid]; -} +extern int _result_typeid(int, const int *); } // namespace dpnp::extensions::py_internal::type_utils diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp new file mode 100644 index 000000000000..bb030fd80dd1 --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -0,0 +1,417 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "simplify_iteration_space.hpp" +#include "dpctl4pybind11.hpp" +#include "utils/strided_iters.hpp" +#include +#include + +namespace dpnp::extensions::py_internal +{ +namespace py = pybind11; + +template +int simplify_iteration_two_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy &disp1, + StridesTy &disp2) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort( + pos.begin(), pos.end(), [&strides1, &strides2, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + return (abs_str2_i1 > abs_str2_i2) || + (abs_str2_i1 == abs_str2_i2 && + (abs_str1_i1 > abs_str1_i2 || + (abs_str1_i1 == abs_str1_i2 && shape[i1] > shape[i2]))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && std::min(str1_p, str2_p) < 0) { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + } + if (str1_p < 0 || str2_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + } + + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + + if (jump1 == str1 && jump2 == str2) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + + return nd_; +} + +template +int simplify_iteration_three_strides(const int nd, + ShapeTy *shape, + StridesTy *strides1, + StridesTy *strides2, + StridesTy *strides3, + StridesTy &disp1, + StridesTy &disp2, + StridesTy &disp3) +{ + disp1 = StridesTy(0); + disp2 = StridesTy(0); + if (nd < 2) + return nd; + + std::vector pos(nd); + std::iota(pos.begin(), pos.end(), 0); + + std::stable_sort(pos.begin(), pos.end(), + [&strides1, &strides2, &strides3, &shape](int i1, int i2) { + auto abs_str1_i1 = + (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; + auto abs_str1_i2 = + (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; + auto abs_str2_i1 = + (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; + auto abs_str2_i2 = + (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; + auto abs_str3_i1 = + (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; + auto abs_str3_i2 = + (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; + return (abs_str3_i1 > abs_str3_i2) || + ((abs_str3_i1 == abs_str3_i2) && + ((abs_str2_i1 > abs_str2_i2) || + ((abs_str2_i1 == abs_str2_i2) && + ((abs_str1_i1 > abs_str1_i2) || + ((abs_str1_i1 == abs_str1_i2) && + (shape[i1] > shape[i2])))))); + }); + + std::vector shape_w; + std::vector strides1_w; + std::vector strides2_w; + std::vector strides3_w; + + bool contractable = true; + for (int i = 0; i < nd; ++i) { + auto p = pos[i]; + auto sh_p = shape[p]; + auto str1_p = strides1[p]; + auto str2_p = strides2[p]; + auto str3_p = strides3[p]; + shape_w.push_back(sh_p); + if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && + std::min({str1_p, str2_p, str3_p}) < 0) + { + disp1 += str1_p * (sh_p - 1); + str1_p = -str1_p; + disp2 += str2_p * (sh_p - 1); + str2_p = -str2_p; + disp3 += str3_p * (sh_p - 1); + str3_p = -str3_p; + } + if (str1_p < 0 || str2_p < 0 || str3_p < 0) { + contractable = false; + } + strides1_w.push_back(str1_p); + strides2_w.push_back(str2_p); + strides3_w.push_back(str3_p); + } + int nd_ = nd; + while (contractable) { + bool changed = false; + for (int i = 0; i + 1 < nd_; ++i) { + StridesTy str1 = strides1_w[i + 1]; + StridesTy str2 = strides2_w[i + 1]; + StridesTy str3 = strides3_w[i + 1]; + StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; + StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; + StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; + + if (jump1 == str1 && jump2 == str2 && jump3 == str3) { + changed = true; + shape_w[i] *= shape_w[i + 1]; + for (int j = i; j < nd_; ++j) { + strides1_w[j] = strides1_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides2_w[j] = strides2_w[j + 1]; + } + for (int j = i; j < nd_; ++j) { + strides3_w[j] = strides3_w[j + 1]; + } + for (int j = i + 1; j + 1 < nd_; ++j) { + shape_w[j] = shape_w[j + 1]; + } + --nd_; + break; + } + } + if (!changed) + break; + } + for (int i = 0; i < nd_; ++i) { + shape[i] = shape_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides1[i] = strides1_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides2[i] = strides2_w[i]; + } + for (int i = 0; i < nd_; ++i) { + strides3[i] = strides3_w[i]; + } + + return nd_; +} + +void simplify_iteration_space(int &nd, + const py::ssize_t *const &shape, + std::vector const &src_strides, + std::vector const &dst_strides, + // output + std::vector &simplified_shape, + std::vector &simplified_src_strides, + std::vector &simplified_dst_strides, + py::ssize_t &src_offset, + py::ssize_t &dst_offset) +{ + if (nd > 1) { + // Simplify iteration space to reduce dimensionality + // and improve access pattern + simplified_shape.reserve(nd); + simplified_shape.insert(std::begin(simplified_shape), shape, + shape + nd); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src_strides.reserve(nd); + simplified_src_strides.insert(std::end(simplified_src_strides), + std::begin(src_strides), + std::end(src_strides)); + assert(simplified_src_strides.size() == static_cast(nd)); + + simplified_dst_strides.reserve(nd); + simplified_dst_strides.insert(std::end(simplified_dst_strides), + std::begin(dst_strides), + std::end(dst_strides)); + assert(simplified_dst_strides.size() == static_cast(nd)); + + int contracted_nd = simplify_iteration_two_strides( + nd, simplified_shape.data(), simplified_src_strides.data(), + simplified_dst_strides.data(), + src_offset, // modified by reference + dst_offset // modified by reference + ); + simplified_shape.resize(contracted_nd); + simplified_src_strides.resize(contracted_nd); + simplified_dst_strides.resize(contracted_nd); + + nd = contracted_nd; + } + else if (nd == 1) { + src_offset = 0; + dst_offset = 0; + // Populate vectors + simplified_shape.reserve(nd); + simplified_shape.push_back(shape[0]); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src_strides.reserve(nd); + simplified_dst_strides.reserve(nd); + + if (src_strides[0] < 0 && dst_strides[0] < 0) { + simplified_src_strides.push_back(-src_strides[0]); + simplified_dst_strides.push_back(-dst_strides[0]); + if (shape[0] > 1) { + src_offset += (shape[0] - 1) * src_strides[0]; + dst_offset += (shape[0] - 1) * dst_strides[0]; + } + } + else { + simplified_src_strides.push_back(src_strides[0]); + simplified_dst_strides.push_back(dst_strides[0]); + } + + assert(simplified_src_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); + } +} + +void simplify_iteration_space_3( + int &nd, + const py::ssize_t *const &shape, + // src1 + std::vector const &src1_strides, + // src2 + std::vector const &src2_strides, + // dst + std::vector const &dst_strides, + // output + std::vector &simplified_shape, + std::vector &simplified_src1_strides, + std::vector &simplified_src2_strides, + std::vector &simplified_dst_strides, + py::ssize_t &src1_offset, + py::ssize_t &src2_offset, + py::ssize_t &dst_offset) +{ + if (nd > 1) { + // Simplify iteration space to reduce dimensionality + // and improve access pattern + simplified_shape.reserve(nd); + simplified_shape.insert(std::end(simplified_shape), shape, shape + nd); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src1_strides.reserve(nd); + simplified_src1_strides.insert(std::end(simplified_src1_strides), + std::begin(src1_strides), + std::end(src1_strides)); + assert(simplified_src1_strides.size() == static_cast(nd)); + + simplified_src2_strides.reserve(nd); + simplified_src2_strides.insert(std::end(simplified_src2_strides), + std::begin(src2_strides), + std::end(src2_strides)); + assert(simplified_src2_strides.size() == static_cast(nd)); + + simplified_dst_strides.reserve(nd); + simplified_dst_strides.insert(std::end(simplified_dst_strides), + std::begin(dst_strides), + std::end(dst_strides)); + assert(simplified_dst_strides.size() == static_cast(nd)); + + int contracted_nd = simplify_iteration_three_strides( + nd, simplified_shape.data(), simplified_src1_strides.data(), + simplified_src2_strides.data(), simplified_dst_strides.data(), + src1_offset, // modified by reference + src2_offset, // modified by reference + dst_offset // modified by reference + ); + simplified_shape.resize(contracted_nd); + simplified_src1_strides.resize(contracted_nd); + simplified_src2_strides.resize(contracted_nd); + simplified_dst_strides.resize(contracted_nd); + + nd = contracted_nd; + } + else if (nd == 1) { + src1_offset = 0; + src2_offset = 0; + dst_offset = 0; + // Populate vectors + simplified_shape.reserve(nd); + simplified_shape.push_back(shape[0]); + assert(simplified_shape.size() == static_cast(nd)); + + simplified_src1_strides.reserve(nd); + simplified_src2_strides.reserve(nd); + simplified_dst_strides.reserve(nd); + + if ((src1_strides[0] < 0) && (src2_strides[0] < 0) && + (dst_strides[0] < 0)) { + simplified_src1_strides.push_back(-src1_strides[0]); + simplified_src2_strides.push_back(-src2_strides[0]); + simplified_dst_strides.push_back(-dst_strides[0]); + if (shape[0] > 1) { + src1_offset += src1_strides[0] * (shape[0] - 1); + src2_offset += src2_strides[0] * (shape[0] - 1); + dst_offset += dst_strides[0] * (shape[0] - 1); + } + } + else { + simplified_src1_strides.push_back(src1_strides[0]); + simplified_src2_strides.push_back(src2_strides[0]); + simplified_dst_strides.push_back(dst_strides[0]); + } + + assert(simplified_src1_strides.size() == static_cast(nd)); + assert(simplified_src2_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); + } +} +} // namespace dpnp::extensions::py_internal \ No newline at end of file diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp index aa47831426c8..111050ae59a6 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.hpp @@ -25,396 +25,37 @@ #pragma once -#include - -#include "dpctl4pybind11.hpp" #include -#include +#include namespace dpnp::extensions::py_internal { - namespace py = pybind11; -template -int simplify_iteration_two_strides(const int nd, - ShapeTy *shape, - StridesTy *strides1, - StridesTy *strides2, - StridesTy &disp1, - StridesTy &disp2) -{ - disp1 = StridesTy(0); - disp2 = StridesTy(0); - if (nd < 2) - return nd; - - std::vector pos(nd); - std::iota(pos.begin(), pos.end(), 0); - - std::stable_sort( - pos.begin(), pos.end(), [&strides1, &strides2, &shape](int i1, int i2) { - auto abs_str1_i1 = - (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; - auto abs_str1_i2 = - (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; - auto abs_str2_i1 = - (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; - auto abs_str2_i2 = - (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; - return (abs_str2_i1 > abs_str2_i2) || - (abs_str2_i1 == abs_str2_i2 && - (abs_str1_i1 > abs_str1_i2 || - (abs_str1_i1 == abs_str1_i2 && shape[i1] > shape[i2]))); - }); - - std::vector shape_w; - std::vector strides1_w; - std::vector strides2_w; - - bool contractable = true; - for (int i = 0; i < nd; ++i) { - auto p = pos[i]; - auto sh_p = shape[p]; - auto str1_p = strides1[p]; - auto str2_p = strides2[p]; - shape_w.push_back(sh_p); - if (str1_p <= 0 && str2_p <= 0 && std::min(str1_p, str2_p) < 0) { - disp1 += str1_p * (sh_p - 1); - str1_p = -str1_p; - disp2 += str2_p * (sh_p - 1); - str2_p = -str2_p; - } - if (str1_p < 0 || str2_p < 0) { - contractable = false; - } - strides1_w.push_back(str1_p); - strides2_w.push_back(str2_p); - } - - int nd_ = nd; - while (contractable) { - bool changed = false; - for (int i = 0; i + 1 < nd_; ++i) { - StridesTy str1 = strides1_w[i + 1]; - StridesTy str2 = strides2_w[i + 1]; - StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; - StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; - - if (jump1 == str1 && jump2 == str2) { - changed = true; - shape_w[i] *= shape_w[i + 1]; - for (int j = i; j < nd_; ++j) { - strides1_w[j] = strides1_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides2_w[j] = strides2_w[j + 1]; - } - for (int j = i + 1; j + 1 < nd_; ++j) { - shape_w[j] = shape_w[j + 1]; - } - --nd_; - break; - } - } - if (!changed) - break; - } - for (int i = 0; i < nd_; ++i) { - shape[i] = shape_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides1[i] = strides1_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides2[i] = strides2_w[i]; - } - - return nd_; -} - -template -int simplify_iteration_three_strides(const int nd, - ShapeTy *shape, - StridesTy *strides1, - StridesTy *strides2, - StridesTy *strides3, - StridesTy &disp1, - StridesTy &disp2, - StridesTy &disp3) -{ - disp1 = StridesTy(0); - disp2 = StridesTy(0); - if (nd < 2) - return nd; - - std::vector pos(nd); - std::iota(pos.begin(), pos.end(), 0); - - std::stable_sort(pos.begin(), pos.end(), - [&strides1, &strides2, &strides3, &shape](int i1, int i2) { - auto abs_str1_i1 = - (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; - auto abs_str1_i2 = - (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; - auto abs_str2_i1 = - (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; - auto abs_str2_i2 = - (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; - auto abs_str3_i1 = - (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; - auto abs_str3_i2 = - (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; - return (abs_str3_i1 > abs_str3_i2) || - ((abs_str3_i1 == abs_str3_i2) && - ((abs_str2_i1 > abs_str2_i2) || - ((abs_str2_i1 == abs_str2_i2) && - ((abs_str1_i1 > abs_str1_i2) || - ((abs_str1_i1 == abs_str1_i2) && - (shape[i1] > shape[i2])))))); - }); - - std::vector shape_w; - std::vector strides1_w; - std::vector strides2_w; - std::vector strides3_w; - - bool contractable = true; - for (int i = 0; i < nd; ++i) { - auto p = pos[i]; - auto sh_p = shape[p]; - auto str1_p = strides1[p]; - auto str2_p = strides2[p]; - auto str3_p = strides3[p]; - shape_w.push_back(sh_p); - if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && - std::min({str1_p, str2_p, str3_p}) < 0) - { - disp1 += str1_p * (sh_p - 1); - str1_p = -str1_p; - disp2 += str2_p * (sh_p - 1); - str2_p = -str2_p; - disp3 += str3_p * (sh_p - 1); - str3_p = -str3_p; - } - if (str1_p < 0 || str2_p < 0 || str3_p < 0) { - contractable = false; - } - strides1_w.push_back(str1_p); - strides2_w.push_back(str2_p); - strides3_w.push_back(str3_p); - } - int nd_ = nd; - while (contractable) { - bool changed = false; - for (int i = 0; i + 1 < nd_; ++i) { - StridesTy str1 = strides1_w[i + 1]; - StridesTy str2 = strides2_w[i + 1]; - StridesTy str3 = strides3_w[i + 1]; - StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; - StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; - StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; - - if (jump1 == str1 && jump2 == str2 && jump3 == str3) { - changed = true; - shape_w[i] *= shape_w[i + 1]; - for (int j = i; j < nd_; ++j) { - strides1_w[j] = strides1_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides2_w[j] = strides2_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides3_w[j] = strides3_w[j + 1]; - } - for (int j = i + 1; j + 1 < nd_; ++j) { - shape_w[j] = shape_w[j + 1]; - } - --nd_; - break; - } - } - if (!changed) - break; - } - for (int i = 0; i < nd_; ++i) { - shape[i] = shape_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides1[i] = strides1_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides2[i] = strides2_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides3[i] = strides3_w[i]; - } - - return nd_; -} - -void simplify_iteration_space(int &nd, - const py::ssize_t *const &shape, - std::vector const &src_strides, - std::vector const &dst_strides, - // output - std::vector &simplified_shape, - std::vector &simplified_src_strides, - std::vector &simplified_dst_strides, - py::ssize_t &src_offset, - py::ssize_t &dst_offset) -{ - if (nd > 1) { - // Simplify iteration space to reduce dimensionality - // and improve access pattern - simplified_shape.reserve(nd); - simplified_shape.insert(std::begin(simplified_shape), shape, - shape + nd); - assert(simplified_shape.size() == static_cast(nd)); - - simplified_src_strides.reserve(nd); - simplified_src_strides.insert(std::end(simplified_src_strides), - std::begin(src_strides), - std::end(src_strides)); - assert(simplified_src_strides.size() == static_cast(nd)); - - simplified_dst_strides.reserve(nd); - simplified_dst_strides.insert(std::end(simplified_dst_strides), - std::begin(dst_strides), - std::end(dst_strides)); - assert(simplified_dst_strides.size() == static_cast(nd)); - - int contracted_nd = simplify_iteration_two_strides( - nd, simplified_shape.data(), simplified_src_strides.data(), - simplified_dst_strides.data(), - src_offset, // modified by reference - dst_offset // modified by reference - ); - simplified_shape.resize(contracted_nd); - simplified_src_strides.resize(contracted_nd); - simplified_dst_strides.resize(contracted_nd); - - nd = contracted_nd; - } - else if (nd == 1) { - src_offset = 0; - dst_offset = 0; - // Populate vectors - simplified_shape.reserve(nd); - simplified_shape.push_back(shape[0]); - assert(simplified_shape.size() == static_cast(nd)); - - simplified_src_strides.reserve(nd); - simplified_dst_strides.reserve(nd); - - if (src_strides[0] < 0 && dst_strides[0] < 0) { - simplified_src_strides.push_back(-src_strides[0]); - simplified_dst_strides.push_back(-dst_strides[0]); - if (shape[0] > 1) { - src_offset += (shape[0] - 1) * src_strides[0]; - dst_offset += (shape[0] - 1) * dst_strides[0]; - } - } - else { - simplified_src_strides.push_back(src_strides[0]); - simplified_dst_strides.push_back(dst_strides[0]); - } - - assert(simplified_src_strides.size() == static_cast(nd)); - assert(simplified_dst_strides.size() == static_cast(nd)); - } -} - -void simplify_iteration_space_3( - int &nd, - const py::ssize_t *const &shape, - // src1 - std::vector const &src1_strides, - // src2 - std::vector const &src2_strides, - // dst - std::vector const &dst_strides, - // output - std::vector &simplified_shape, - std::vector &simplified_src1_strides, - std::vector &simplified_src2_strides, - std::vector &simplified_dst_strides, - py::ssize_t &src1_offset, - py::ssize_t &src2_offset, - py::ssize_t &dst_offset) -{ - if (nd > 1) { - // Simplify iteration space to reduce dimensionality - // and improve access pattern - simplified_shape.reserve(nd); - simplified_shape.insert(std::end(simplified_shape), shape, shape + nd); - assert(simplified_shape.size() == static_cast(nd)); - - simplified_src1_strides.reserve(nd); - simplified_src1_strides.insert(std::end(simplified_src1_strides), - std::begin(src1_strides), - std::end(src1_strides)); - assert(simplified_src1_strides.size() == static_cast(nd)); - - simplified_src2_strides.reserve(nd); - simplified_src2_strides.insert(std::end(simplified_src2_strides), - std::begin(src2_strides), - std::end(src2_strides)); - assert(simplified_src2_strides.size() == static_cast(nd)); - - simplified_dst_strides.reserve(nd); - simplified_dst_strides.insert(std::end(simplified_dst_strides), - std::begin(dst_strides), - std::end(dst_strides)); - assert(simplified_dst_strides.size() == static_cast(nd)); - - int contracted_nd = simplify_iteration_three_strides( - nd, simplified_shape.data(), simplified_src1_strides.data(), - simplified_src2_strides.data(), simplified_dst_strides.data(), - src1_offset, // modified by reference - src2_offset, // modified by reference - dst_offset // modified by reference - ); - simplified_shape.resize(contracted_nd); - simplified_src1_strides.resize(contracted_nd); - simplified_src2_strides.resize(contracted_nd); - simplified_dst_strides.resize(contracted_nd); - - nd = contracted_nd; - } - else if (nd == 1) { - src1_offset = 0; - src2_offset = 0; - dst_offset = 0; - // Populate vectors - simplified_shape.reserve(nd); - simplified_shape.push_back(shape[0]); - assert(simplified_shape.size() == static_cast(nd)); - - simplified_src1_strides.reserve(nd); - simplified_src2_strides.reserve(nd); - simplified_dst_strides.reserve(nd); - - if ((src1_strides[0] < 0) && (src2_strides[0] < 0) && - (dst_strides[0] < 0)) { - simplified_src1_strides.push_back(-src1_strides[0]); - simplified_src2_strides.push_back(-src2_strides[0]); - simplified_dst_strides.push_back(-dst_strides[0]); - if (shape[0] > 1) { - src1_offset += src1_strides[0] * (shape[0] - 1); - src2_offset += src2_strides[0] * (shape[0] - 1); - dst_offset += dst_strides[0] * (shape[0] - 1); - } - } - else { - simplified_src1_strides.push_back(src1_strides[0]); - simplified_src2_strides.push_back(src2_strides[0]); - simplified_dst_strides.push_back(dst_strides[0]); - } - - assert(simplified_src1_strides.size() == static_cast(nd)); - assert(simplified_src2_strides.size() == static_cast(nd)); - assert(simplified_dst_strides.size() == static_cast(nd)); - } -} +void simplify_iteration_space(int &, + const py::ssize_t *const &, + std::vector const &, + std::vector const &, + std::vector &, + std::vector &, + std::vector &, + py::ssize_t &, + py::ssize_t &); + +void simplify_iteration_space_3(int &, + const py::ssize_t *const &, + // src1 + std::vector const &, + // src2 + std::vector const &, + // dst + std::vector const &, + // output + std::vector &, + std::vector &, + std::vector &, + std::vector &, + py::ssize_t &, + py::ssize_t &, + py::ssize_t &); } // namespace dpnp::extensions::py_internal diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 3c30d27cc9ab..fccbab0cc5a0 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -28,6 +28,9 @@ set(_elementwise_sources ) set(_module_src + # TODO: remove sources from `elementwise_functions` folder + ${CMAKE_CURRENT_SOURCE_DIR}/../elementwise_functions/elementwise_functions_type_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../elementwise_functions/simplify_iteration_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vm_py.cpp ${_elementwise_sources} ) From 7eb99d84e3545aec109f1207d07be40a2dbefb5a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 13:14:39 +0200 Subject: [PATCH 06/35] Separated implementation of acos function --- .../simplify_iteration_space.cpp | 2 +- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/abs.hpp | 1 - dpnp/backend/extensions/vm/acos.cpp | 140 ++++++++++++++++++ dpnp/backend/extensions/vm/acos.hpp | 54 +------ dpnp/backend/extensions/vm/types_matrix.hpp | 17 --- dpnp/backend/extensions/vm/vm_py.cpp | 30 +--- 7 files changed, 148 insertions(+), 97 deletions(-) create mode 100644 dpnp/backend/extensions/vm/acos.cpp diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp index bb030fd80dd1..4bcc8688c173 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -414,4 +414,4 @@ void simplify_iteration_space_3( assert(simplified_dst_strides.size() == static_cast(nd)); } } -} // namespace dpnp::extensions::py_internal \ No newline at end of file +} // namespace dpnp::extensions::py_internal diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index fccbab0cc5a0..727bc9ede11d 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -25,6 +25,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/abs.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/abs.hpp b/dpnp/backend/extensions/vm/abs.hpp index 9a76a89b97c3..9e074bc1ac88 100644 --- a/dpnp/backend/extensions/vm/abs.hpp +++ b/dpnp/backend/extensions/vm/abs.hpp @@ -26,7 +26,6 @@ #pragma once #include -#include namespace py = pybind11; diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp new file mode 100644 index 000000000000..81044a24bc27 --- /dev/null +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -0,0 +1,140 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "acos.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::acos function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event acos_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::acos(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(acos); +} // namespace impl + +void init_acos(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + using impl::strided_dispatch_vector; + + auto acos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, strided_dispatch_vector); + }; + m.def("_acos", acos_pyapi, + "Call `acos` function from OneMKL VM library to compute " + "inverse cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto acos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_acos_to_call", acos_need_to_call_pyapi, + "Check input arguments to answer if `acos` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} + +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/acos.hpp b/dpnp/backend/extensions/vm/acos.hpp index 029a9d9c886a..2bfb2a71d6b8 100644 --- a/dpnp/backend/extensions/vm/acos.hpp +++ b/dpnp/backend/extensions/vm/acos.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event acos_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AcosOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::acos(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AcosContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AcosOutputType::value_type, void>) - { - return nullptr; - } - else { - return acos_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_acos(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 656fc4b99c48..3fd7d2326887 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,23 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::acos function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AcosOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::acosh function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index c94fab5d262c..6b5098f23f9e 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,7 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t acos_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t acosh_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t add_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asin_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,34 +115,7 @@ PYBIND11_MODULE(_vm_impl, m) using event_vecT = std::vector; vm_ns::init_abs(m); - - // UnaryUfunc: ==== Acos(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - acos_dispatch_vector); - - auto acos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - acos_dispatch_vector); - }; - m.def("_acos", acos_pyapi, - "Call `acos` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto acos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - acos_dispatch_vector); - }; - m.def("_mkl_acos_to_call", acos_need_to_call_pyapi, - "Check input arguments to answer if `acos` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_acos(m); // UnaryUfunc: ==== Acosh(x) ==== { From 2418e84d17bbc21998b961337ca8dcc41137939a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 13:20:26 +0200 Subject: [PATCH 07/35] Separated implementation of acosh function --- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/acosh.cpp | 140 ++++++++++++++++++++ dpnp/backend/extensions/vm/acosh.hpp | 54 +------- dpnp/backend/extensions/vm/types_matrix.hpp | 17 --- dpnp/backend/extensions/vm/vm_py.cpp | 30 +---- 5 files changed, 147 insertions(+), 95 deletions(-) create mode 100644 dpnp/backend/extensions/vm/acosh.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 727bc9ede11d..588115e710be 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -26,6 +26,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/abs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/acosh.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp new file mode 100644 index 000000000000..40022326477f --- /dev/null +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -0,0 +1,140 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "acosh.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::acosh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event acosh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::acosh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(acosh); +} // namespace impl + +void init_acosh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + using impl::strided_dispatch_vector; + + auto acosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, strided_dispatch_vector); + }; + m.def("_acosh", acosh_pyapi, + "Call `acosh` function from OneMKL VM library to compute " + "inverse cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto acosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_acosh_to_call", acosh_need_to_call_pyapi, + "Check input arguments to answer if `acosh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} + +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/acosh.hpp b/dpnp/backend/extensions/vm/acosh.hpp index 9f86ae589cf5..6cfde12cbcb3 100644 --- a/dpnp/backend/extensions/vm/acosh.hpp +++ b/dpnp/backend/extensions/vm/acosh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event acosh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AcoshOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::acosh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AcoshContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AcoshOutputType::value_type, void>) - { - return nullptr; - } - else { - return acosh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_acosh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 3fd7d2326887..e31c3e92f915 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,23 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::acosh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AcoshOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::add function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 6b5098f23f9e..7de367c14a90 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,7 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t acosh_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t add_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asin_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asinh_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,34 +115,7 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_abs(m); vm_ns::init_acos(m); - - // UnaryUfunc: ==== Acosh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - acosh_dispatch_vector); - - auto acosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - acosh_dispatch_vector); - }; - m.def("_acosh", acosh_pyapi, - "Call `acosh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto acosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - acosh_dispatch_vector); - }; - m.def("_mkl_acosh_to_call", acosh_need_to_call_pyapi, - "Check input arguments to answer if `acosh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_acosh(m); // BinaryUfunc: ==== Add(x1, x2) ==== { From 774833146959b232f98a2cfe1d9dfebecc42cd6c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 14:07:28 +0200 Subject: [PATCH 08/35] Use function to simplify strides from dpctl tensor headers --- .../elementwise_functions_type_utils.cpp | 1 + .../simplify_iteration_space.cpp | 230 +----------------- 2 files changed, 10 insertions(+), 221 deletions(-) diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index edb6b6118804..3f88f735a710 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -24,6 +24,7 @@ //***************************************************************************** #include "dpctl4pybind11.hpp" + #include #include #include diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp index 4bcc8688c173..a3ab0b99b7a2 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -23,232 +23,20 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "simplify_iteration_space.hpp" #include "dpctl4pybind11.hpp" -#include "utils/strided_iters.hpp" + #include #include -namespace dpnp::extensions::py_internal -{ -namespace py = pybind11; - -template -int simplify_iteration_two_strides(const int nd, - ShapeTy *shape, - StridesTy *strides1, - StridesTy *strides2, - StridesTy &disp1, - StridesTy &disp2) -{ - disp1 = StridesTy(0); - disp2 = StridesTy(0); - if (nd < 2) - return nd; - - std::vector pos(nd); - std::iota(pos.begin(), pos.end(), 0); - - std::stable_sort( - pos.begin(), pos.end(), [&strides1, &strides2, &shape](int i1, int i2) { - auto abs_str1_i1 = - (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; - auto abs_str1_i2 = - (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; - auto abs_str2_i1 = - (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; - auto abs_str2_i2 = - (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; - return (abs_str2_i1 > abs_str2_i2) || - (abs_str2_i1 == abs_str2_i2 && - (abs_str1_i1 > abs_str1_i2 || - (abs_str1_i1 == abs_str1_i2 && shape[i1] > shape[i2]))); - }); - - std::vector shape_w; - std::vector strides1_w; - std::vector strides2_w; - - bool contractable = true; - for (int i = 0; i < nd; ++i) { - auto p = pos[i]; - auto sh_p = shape[p]; - auto str1_p = strides1[p]; - auto str2_p = strides2[p]; - shape_w.push_back(sh_p); - if (str1_p <= 0 && str2_p <= 0 && std::min(str1_p, str2_p) < 0) { - disp1 += str1_p * (sh_p - 1); - str1_p = -str1_p; - disp2 += str2_p * (sh_p - 1); - str2_p = -str2_p; - } - if (str1_p < 0 || str2_p < 0) { - contractable = false; - } - strides1_w.push_back(str1_p); - strides2_w.push_back(str2_p); - } - - int nd_ = nd; - while (contractable) { - bool changed = false; - for (int i = 0; i + 1 < nd_; ++i) { - StridesTy str1 = strides1_w[i + 1]; - StridesTy str2 = strides2_w[i + 1]; - StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; - StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; - - if (jump1 == str1 && jump2 == str2) { - changed = true; - shape_w[i] *= shape_w[i + 1]; - for (int j = i; j < nd_; ++j) { - strides1_w[j] = strides1_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides2_w[j] = strides2_w[j + 1]; - } - for (int j = i + 1; j + 1 < nd_; ++j) { - shape_w[j] = shape_w[j + 1]; - } - --nd_; - break; - } - } - if (!changed) - break; - } - for (int i = 0; i < nd_; ++i) { - shape[i] = shape_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides1[i] = strides1_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides2[i] = strides2_w[i]; - } +#include "simplify_iteration_space.hpp" - return nd_; -} +// dpctl tensor headers +#include "utils/strided_iters.hpp" -template -int simplify_iteration_three_strides(const int nd, - ShapeTy *shape, - StridesTy *strides1, - StridesTy *strides2, - StridesTy *strides3, - StridesTy &disp1, - StridesTy &disp2, - StridesTy &disp3) +namespace dpnp::extensions::py_internal { - disp1 = StridesTy(0); - disp2 = StridesTy(0); - if (nd < 2) - return nd; - - std::vector pos(nd); - std::iota(pos.begin(), pos.end(), 0); - - std::stable_sort(pos.begin(), pos.end(), - [&strides1, &strides2, &strides3, &shape](int i1, int i2) { - auto abs_str1_i1 = - (strides1[i1] < 0) ? -strides1[i1] : strides1[i1]; - auto abs_str1_i2 = - (strides1[i2] < 0) ? -strides1[i2] : strides1[i2]; - auto abs_str2_i1 = - (strides2[i1] < 0) ? -strides2[i1] : strides2[i1]; - auto abs_str2_i2 = - (strides2[i2] < 0) ? -strides2[i2] : strides2[i2]; - auto abs_str3_i1 = - (strides3[i1] < 0) ? -strides3[i1] : strides3[i1]; - auto abs_str3_i2 = - (strides3[i2] < 0) ? -strides3[i2] : strides3[i2]; - return (abs_str3_i1 > abs_str3_i2) || - ((abs_str3_i1 == abs_str3_i2) && - ((abs_str2_i1 > abs_str2_i2) || - ((abs_str2_i1 == abs_str2_i2) && - ((abs_str1_i1 > abs_str1_i2) || - ((abs_str1_i1 == abs_str1_i2) && - (shape[i1] > shape[i2])))))); - }); - - std::vector shape_w; - std::vector strides1_w; - std::vector strides2_w; - std::vector strides3_w; - - bool contractable = true; - for (int i = 0; i < nd; ++i) { - auto p = pos[i]; - auto sh_p = shape[p]; - auto str1_p = strides1[p]; - auto str2_p = strides2[p]; - auto str3_p = strides3[p]; - shape_w.push_back(sh_p); - if (str1_p <= 0 && str2_p <= 0 && str3_p <= 0 && - std::min({str1_p, str2_p, str3_p}) < 0) - { - disp1 += str1_p * (sh_p - 1); - str1_p = -str1_p; - disp2 += str2_p * (sh_p - 1); - str2_p = -str2_p; - disp3 += str3_p * (sh_p - 1); - str3_p = -str3_p; - } - if (str1_p < 0 || str2_p < 0 || str3_p < 0) { - contractable = false; - } - strides1_w.push_back(str1_p); - strides2_w.push_back(str2_p); - strides3_w.push_back(str3_p); - } - int nd_ = nd; - while (contractable) { - bool changed = false; - for (int i = 0; i + 1 < nd_; ++i) { - StridesTy str1 = strides1_w[i + 1]; - StridesTy str2 = strides2_w[i + 1]; - StridesTy str3 = strides3_w[i + 1]; - StridesTy jump1 = strides1_w[i] - (shape_w[i + 1] - 1) * str1; - StridesTy jump2 = strides2_w[i] - (shape_w[i + 1] - 1) * str2; - StridesTy jump3 = strides3_w[i] - (shape_w[i + 1] - 1) * str3; - - if (jump1 == str1 && jump2 == str2 && jump3 == str3) { - changed = true; - shape_w[i] *= shape_w[i + 1]; - for (int j = i; j < nd_; ++j) { - strides1_w[j] = strides1_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides2_w[j] = strides2_w[j + 1]; - } - for (int j = i; j < nd_; ++j) { - strides3_w[j] = strides3_w[j + 1]; - } - for (int j = i + 1; j + 1 < nd_; ++j) { - shape_w[j] = shape_w[j + 1]; - } - --nd_; - break; - } - } - if (!changed) - break; - } - for (int i = 0; i < nd_; ++i) { - shape[i] = shape_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides1[i] = strides1_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides2[i] = strides2_w[i]; - } - for (int i = 0; i < nd_; ++i) { - strides3[i] = strides3_w[i]; - } - - return nd_; -} +namespace py = pybind11; +namespace st_ns = dpctl::tensor::strides; void simplify_iteration_space(int &nd, const py::ssize_t *const &shape, @@ -281,7 +69,7 @@ void simplify_iteration_space(int &nd, std::end(dst_strides)); assert(simplified_dst_strides.size() == static_cast(nd)); - int contracted_nd = simplify_iteration_two_strides( + int contracted_nd = st_ns::simplify_iteration_two_strides( nd, simplified_shape.data(), simplified_src_strides.data(), simplified_dst_strides.data(), src_offset, // modified by reference @@ -365,7 +153,7 @@ void simplify_iteration_space_3( std::end(dst_strides)); assert(simplified_dst_strides.size() == static_cast(nd)); - int contracted_nd = simplify_iteration_three_strides( + int contracted_nd = st_ns::simplify_iteration_three_strides( nd, simplified_shape.data(), simplified_src1_strides.data(), simplified_src2_strides.data(), simplified_dst_strides.data(), src1_offset, // modified by reference From f9fbbce825305fdba7a14ab8d867a3a06ba48223 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 15:26:25 +0200 Subject: [PATCH 09/35] PoC to decouple add implementation to separate source file --- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/abs.cpp | 6 +- dpnp/backend/extensions/vm/acos.cpp | 6 +- dpnp/backend/extensions/vm/acosh.cpp | 6 +- dpnp/backend/extensions/vm/add.cpp | 171 ++++++++++++++++++++ dpnp/backend/extensions/vm/add.hpp | 57 +------ dpnp/backend/extensions/vm/common.hpp | 79 ++++++--- dpnp/backend/extensions/vm/types_matrix.hpp | 25 --- dpnp/backend/extensions/vm/vm_py.cpp | 32 +--- 9 files changed, 243 insertions(+), 140 deletions(-) create mode 100644 dpnp/backend/extensions/vm/add.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 588115e710be..ca0b46c5909c 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -27,6 +27,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/abs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/acosh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/add.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index c3e5a13e8629..dc590add1ee4 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -99,7 +99,6 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; static int output_typeid_vector[td_ns::num_types]; static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; MACRO_POPULATE_DISPATCH_VECTORS(abs); } // namespace impl @@ -112,13 +111,14 @@ void init_abs(py::module_ m) impl::populate_dispatch_vectors(); using impl::contig_dispatch_vector; using impl::output_typeid_vector; - using impl::strided_dispatch_vector; auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, - contig_dispatch_vector, strided_dispatch_vector); + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); }; m.def("_abs", abs_pyapi, "Call `abs` function from OneMKL VM library to compute " diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 81044a24bc27..0bb6653abbc8 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -99,7 +99,6 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; static int output_typeid_vector[td_ns::num_types]; static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; MACRO_POPULATE_DISPATCH_VECTORS(acos); } // namespace impl @@ -112,13 +111,14 @@ void init_acos(py::module_ m) impl::populate_dispatch_vectors(); using impl::contig_dispatch_vector; using impl::output_typeid_vector; - using impl::strided_dispatch_vector; auto acos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, - contig_dispatch_vector, strided_dispatch_vector); + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); }; m.def("_acos", acos_pyapi, "Call `acos` function from OneMKL VM library to compute " diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index 40022326477f..cc9274d7b706 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -99,7 +99,6 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; static int output_typeid_vector[td_ns::num_types]; static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t strided_dispatch_vector[td_ns::num_types]; MACRO_POPULATE_DISPATCH_VECTORS(acosh); } // namespace impl @@ -112,13 +111,14 @@ void init_acosh(py::module_ m) impl::populate_dispatch_vectors(); using impl::contig_dispatch_vector; using impl::output_typeid_vector; - using impl::strided_dispatch_vector; auto acosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, - contig_dispatch_vector, strided_dispatch_vector); + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); }; m.def("_acosh", acosh_pyapi, "Call `acosh` function from OneMKL VM library to compute " diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp new file mode 100644 index 000000000000..195a17c4014d --- /dev/null +++ b/dpnp/backend/extensions/vm/add.cpp @@ -0,0 +1,171 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "add.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::add function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event add_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::add(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(add); +} // namespace impl + +void init_add(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto add_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_add", add_pyapi, + "Call `add` function from OneMKL VM library to performs element " + "by element addition of vector `src1` by vector `src2` " + "to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto add_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + contig_dispatch_vector); + }; + m.def("_mkl_add_to_call", add_need_to_call_pyapi, + "Check input arguments to answer if `add` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} + +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/add.hpp b/dpnp/backend/extensions/vm/add.hpp index 47ff60ed96a6..824fb649f2d0 100644 --- a/dpnp/backend/extensions/vm/add.hpp +++ b/dpnp/backend/extensions/vm/add.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event add_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::AddOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::add(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AddContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AddOutputType::value_type, void>) - { - return nullptr; - } - else { - return add_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_add(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index dedbb83a41f5..570c79b15122 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -435,23 +435,9 @@ bool need_to_call_binary_ufunc(sycl::queue exec_q, } /** - * @brief A factory with no support provided to the implementation. - * - * @tparam fnT Type of function pointer to dispatch the implementation - * @tparam T Type of input vector - */ -template -struct NoSupportFactory -{ - fnT get() - { - return nullptr; - } -}; - -/** - * @brief A macro used to define factories and a populating function to dispatch - * to a callback with proper OneMKL function within VM extension scope. + * @brief A macro used to define factories and a populating unary functions + * to dispatch to a callback with proper OneMKL function within VM extension + * scope. */ #define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \ template \ @@ -486,10 +472,47 @@ struct NoSupportFactory vm_ext::init_ufunc_dispatch_vector( \ contig_dispatch_vector); \ - /* no support of strided implementation in OneMKL */ \ - vm_ext::init_ufunc_dispatch_vector( \ - strided_dispatch_vector); \ + }; + +/** + * @brief A macro used to define factories and a populating binary functions + * to dispatch to a callback with proper OneMKL function within VM extension + * scope. + */ +#define MACRO_POPULATE_DISPATCH_TABLES(__name__) \ + template \ + struct ContigFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v< \ + typename OutputType::value_type, void>) \ + { \ + return nullptr; \ + } \ + else { \ + return __name__##_contig_impl; \ + } \ + } \ + }; \ + \ + template \ + struct TypeMapFactory \ + { \ + std::enable_if_t::value, int> get() \ + { \ + using rT = typename OutputType::value_type; \ + return td_ns::GetTypeid{}.get(); \ + } \ + }; \ + \ + static void populate_dispatch_tables(void) \ + { \ + vm_ext::init_ufunc_dispatch_table( \ + output_typeid_vector); \ + vm_ext::init_ufunc_dispatch_table( \ + contig_dispatch_vector); \ }; template void init_ufunc_dispatch_vector(dispatchT dispatch_vector[]) { - dpctl_td_ns::DispatchVectorBuilder contig; - contig.populate_dispatch_vector(dispatch_vector); + dpctl_td_ns::DispatchVectorBuilder dvb; + dvb.populate_dispatch_vector(dispatch_vector); +} + +template + typename factoryT, + int _num_types = dpctl_td_ns::num_types> +void init_ufunc_dispatch_table(dispatchT dispatch_table[][_num_types]) +{ + dpctl_td_ns::DispatchTableBuilder dtb; + dtb.populate_dispatch_table(dispatch_table); } } // namespace dpnp::backend::ext::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index e31c3e92f915..3e7ac9d90d8b 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,31 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::add function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct AddOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::asin function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 7de367c14a90..46400e3a06eb 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,7 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static binary_impl_fn_ptr_t add_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asin_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asinh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t atan_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,36 +115,7 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_abs(m); vm_ns::init_acos(m); vm_ns::init_acosh(m); - - // BinaryUfunc: ==== Add(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - add_dispatch_vector); - - auto add_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - add_dispatch_vector); - }; - m.def("_add", add_pyapi, - "Call `add` function from OneMKL VM library to performs element " - "by element addition of vector `src1` by vector `src2` " - "to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto add_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - add_dispatch_vector); - }; - m.def("_mkl_add_to_call", add_need_to_call_pyapi, - "Check input arguments to answer if `add` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } + vm_ns::init_add(m); // UnaryUfunc: ==== Asin(x) ==== { From 1d16fc37f19f1d8de5976fbb08e6738ccd7fbff0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 16:52:47 +0200 Subject: [PATCH 10/35] Separated implementation of asin function --- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/abs.cpp | 1 - dpnp/backend/extensions/vm/acos.cpp | 1 - dpnp/backend/extensions/vm/acosh.cpp | 1 - dpnp/backend/extensions/vm/add.cpp | 1 - dpnp/backend/extensions/vm/asin.cpp | 139 ++++++++++++++++++++ dpnp/backend/extensions/vm/asin.hpp | 54 +------- dpnp/backend/extensions/vm/types_matrix.hpp | 17 --- dpnp/backend/extensions/vm/vm_py.cpp | 30 +---- 9 files changed, 146 insertions(+), 99 deletions(-) create mode 100644 dpnp/backend/extensions/vm/asin.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index ca0b46c5909c..ffedda092b41 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -28,6 +28,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/acosh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/add.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/asin.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index dc590add1ee4..ac41e48989d9 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -136,5 +136,4 @@ void init_abs(py::module_ m) "OneMKL VM library can be used", py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); } - } // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 0bb6653abbc8..3a33e570e79f 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -136,5 +136,4 @@ void init_acos(py::module_ m) "OneMKL VM library can be used", py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); } - } // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index cc9274d7b706..3b927031f503 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -136,5 +136,4 @@ void init_acosh(py::module_ m) "OneMKL VM library can be used", py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); } - } // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 195a17c4014d..9f6ab16eba8d 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -167,5 +167,4 @@ void init_add(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst")); } - } // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp new file mode 100644 index 000000000000..8f2bfea867b4 --- /dev/null +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "asin.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::asin function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event asin_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::asin(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(asin); +} // namespace impl + +void init_asin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto asin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_asin", asin_pyapi, + "Call `asin` function from OneMKL VM library to compute " + "inverse sine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto asin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_asin_to_call", asin_need_to_call_pyapi, + "Check input arguments to answer if `asin` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/asin.hpp b/dpnp/backend/extensions/vm/asin.hpp index 5e44aa5bde68..a37bff38fbc7 100644 --- a/dpnp/backend/extensions/vm/asin.hpp +++ b/dpnp/backend/extensions/vm/asin.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event asin_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AsinOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::asin(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AsinContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AsinOutputType::value_type, void>) - { - return nullptr; - } - else { - return asin_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_asin(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 3e7ac9d90d8b..d4aeb837099e 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,23 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::asin function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AsinOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::asinh function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 46400e3a06eb..7faf802e94ee 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,7 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t asin_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t asinh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t atan_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t atan2_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,34 +115,7 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_acos(m); vm_ns::init_acosh(m); vm_ns::init_add(m); - - // UnaryUfunc: ==== Asin(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - asin_dispatch_vector); - - auto asin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - asin_dispatch_vector); - }; - m.def("_asin", asin_pyapi, - "Call `asin` function from OneMKL VM library to compute " - "inverse sine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto asin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - asin_dispatch_vector); - }; - m.def("_mkl_asin_to_call", asin_need_to_call_pyapi, - "Check input arguments to answer if `asin` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_asin(m); // UnaryUfunc: ==== Asinh(x) ==== { From d2f31e599500f7d7ffb72af4457ec17bab854801 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 16:57:57 +0200 Subject: [PATCH 11/35] Separated implementation of asinh function --- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/asinh.cpp | 139 ++++++++++++++++++++ dpnp/backend/extensions/vm/asinh.hpp | 54 +------- dpnp/backend/extensions/vm/types_matrix.hpp | 17 --- dpnp/backend/extensions/vm/vm_py.cpp | 30 +---- 5 files changed, 146 insertions(+), 95 deletions(-) create mode 100644 dpnp/backend/extensions/vm/asinh.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index ffedda092b41..adc62bf8fe56 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -29,6 +29,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/acosh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/add.cpp ${CMAKE_CURRENT_SOURCE_DIR}/asin.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/asinh.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp new file mode 100644 index 000000000000..e0b425cccc43 --- /dev/null +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "asinh.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::asinh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event asinh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::asinh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(asinh); +} // namespace impl + +void init_asinh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto asinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_asinh", asinh_pyapi, + "Call `asinh` function from OneMKL VM library to compute " + "inverse cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto asinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_asinh_to_call", asinh_need_to_call_pyapi, + "Check input arguments to answer if `asinh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/asinh.hpp b/dpnp/backend/extensions/vm/asinh.hpp index 58e2815e3f7f..ad40f0d4efb4 100644 --- a/dpnp/backend/extensions/vm/asinh.hpp +++ b/dpnp/backend/extensions/vm/asinh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event asinh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AsinhOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::asinh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AsinhContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AsinhOutputType::value_type, void>) - { - return nullptr; - } - else { - return asinh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_asinh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index d4aeb837099e..d1134b2644c3 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,23 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::asinh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AsinhOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::atan function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 7faf802e94ee..b1023736350c 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,7 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t asinh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t atan_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t atan2_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t atanh_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,34 +115,7 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_acosh(m); vm_ns::init_add(m); vm_ns::init_asin(m); - - // UnaryUfunc: ==== Asinh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - asinh_dispatch_vector); - - auto asinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - asinh_dispatch_vector); - }; - m.def("_asinh", asinh_pyapi, - "Call `asinh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto asinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - asinh_dispatch_vector); - }; - m.def("_mkl_asinh_to_call", asinh_need_to_call_pyapi, - "Check input arguments to answer if `asinh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_asinh(m); // UnaryUfunc: ==== Atan(x) ==== { From d63fcff906637ceef63315d56471bec785a42de2 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 5 Jun 2024 17:06:14 +0200 Subject: [PATCH 12/35] Separated implementation of atan, atan2, atanh functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 3 + dpnp/backend/extensions/vm/acosh.cpp | 2 +- dpnp/backend/extensions/vm/add.cpp | 2 +- dpnp/backend/extensions/vm/asinh.cpp | 2 +- dpnp/backend/extensions/vm/atan.cpp | 139 +++++++++++++++++ dpnp/backend/extensions/vm/atan.hpp | 54 +------ dpnp/backend/extensions/vm/atan2.cpp | 159 ++++++++++++++++++++ dpnp/backend/extensions/vm/atan2.hpp | 57 +------ dpnp/backend/extensions/vm/atanh.cpp | 139 +++++++++++++++++ dpnp/backend/extensions/vm/atanh.hpp | 54 +------ dpnp/backend/extensions/vm/types_matrix.hpp | 32 ---- dpnp/backend/extensions/vm/vm_py.cpp | 91 +---------- 12 files changed, 461 insertions(+), 273 deletions(-) create mode 100644 dpnp/backend/extensions/vm/atan.cpp create mode 100644 dpnp/backend/extensions/vm/atan2.cpp create mode 100644 dpnp/backend/extensions/vm/atanh.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index adc62bf8fe56..02b6f9125e4e 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -30,6 +30,9 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/add.cpp ${CMAKE_CURRENT_SOURCE_DIR}/asin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/asinh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/atan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/atan2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/atanh.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index 3b927031f503..a3e1a01fe808 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -122,7 +122,7 @@ void init_acosh(py::module_ m) }; m.def("_acosh", acosh_pyapi, "Call `acosh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", + "inverse hyperbolic cosine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 9f6ab16eba8d..627c87f8afbc 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -59,7 +59,7 @@ namespace mkl_vm = oneapi::mkl::vm; * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::add function. * - * @tparam T Type of input vector `a` and of result vector `y`. + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. */ template struct OutputType diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index e0b425cccc43..2d72dde6ccce 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -122,7 +122,7 @@ void init_asinh(py::module_ m) }; m.def("_asinh", asinh_pyapi, "Call `asinh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", + "inverse hyperbolic sine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp new file mode 100644 index 000000000000..9ce1b59aa145 --- /dev/null +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "atan.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::atan function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event atan_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::atan(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(atan); +} // namespace impl + +void init_atan(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto atan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_atan", atan_pyapi, + "Call `atan` function from OneMKL VM library to compute " + "inverse tangent of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto atan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_atan_to_call", atan_need_to_call_pyapi, + "Check input arguments to answer if `atan` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/atan.hpp b/dpnp/backend/extensions/vm/atan.hpp index b36abc161383..90547e92c8d9 100644 --- a/dpnp/backend/extensions/vm/atan.hpp +++ b/dpnp/backend/extensions/vm/atan.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event atan_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AtanOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::atan(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AtanContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AtanOutputType::value_type, void>) - { - return nullptr; - } - else { - return atan_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_atan(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp new file mode 100644 index 000000000000..593edf4a773a --- /dev/null +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -0,0 +1,159 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "atan2.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::atan2 function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event atan2_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::atan2(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(atan2); +} // namespace impl + +void init_atan2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto atan2_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_atan2", atan2_pyapi, + "Call `atan2` function from OneMKL VM library to compute element " + "by element inverse tangent of `x1/x2`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto atan2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + contig_dispatch_vector); + }; + m.def("_mkl_atan2_to_call", atan2_need_to_call_pyapi, + "Check input arguments to answer if `atan2` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/atan2.hpp b/dpnp/backend/extensions/vm/atan2.hpp index 19a66e877ac4..cd0e259914c8 100644 --- a/dpnp/backend/extensions/vm/atan2.hpp +++ b/dpnp/backend/extensions/vm/atan2.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event atan2_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::Atan2OutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::atan2(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Atan2ContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Atan2OutputType::value_type, void>) - { - return nullptr; - } - else { - return atan2_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_atan2(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp new file mode 100644 index 000000000000..d1d44a7a2f63 --- /dev/null +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "atanh.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::atanh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event atanh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::atanh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(atanh); +} // namespace impl + +void init_atanh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto atanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_atanh", atanh_pyapi, + "Call `atanh` function from OneMKL VM library to compute " + "inverse hyperbolic tangent of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto atanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, + contig_dispatch_vector); + }; + m.def("_mkl_atanh_to_call", atanh_need_to_call_pyapi, + "Check input arguments to answer if `atanh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/atanh.hpp b/dpnp/backend/extensions/vm/atanh.hpp index 9764df84ce31..afe404adf9bd 100644 --- a/dpnp/backend/extensions/vm/atanh.hpp +++ b/dpnp/backend/extensions/vm/atanh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event atanh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::AtanhOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::atanh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct AtanhContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::AtanhOutputType::value_type, void>) - { - return nullptr; - } - else { - return atanh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_atanh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index d1134b2644c3..940c68565e7a 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,38 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::atan function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AtanOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::atan2 function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct Atan2OutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::atanh function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index b1023736350c..9b80a7bea46b 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,9 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t atan_dispatch_vector[dpctl_td_ns::num_types]; -static binary_impl_fn_ptr_t atan2_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t atanh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t cbrt_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t ceil_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t conj_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,91 +113,9 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_add(m); vm_ns::init_asin(m); vm_ns::init_asinh(m); - - // UnaryUfunc: ==== Atan(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - atan_dispatch_vector); - - auto atan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - atan_dispatch_vector); - }; - m.def("_atan", atan_pyapi, - "Call `atan` function from OneMKL VM library to compute " - "inverse tangent of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto atan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - atan_dispatch_vector); - }; - m.def("_mkl_atan_to_call", atan_need_to_call_pyapi, - "Check input arguments to answer if `atan` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // BinaryUfunc: ==== Atan2(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - atan2_dispatch_vector); - - auto atan2_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - atan2_dispatch_vector); - }; - m.def("_atan2", atan2_pyapi, - "Call `atan2` function from OneMKL VM library to compute element " - "by element inverse tangent of `x1/x2`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto atan2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - atan2_dispatch_vector); - }; - m.def("_mkl_atan2_to_call", atan2_need_to_call_pyapi, - "Check input arguments to answer if `atan2` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } - - // UnaryUfunc: ==== Atanh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - atanh_dispatch_vector); - - auto atanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - atanh_dispatch_vector); - }; - m.def("_atanh", atanh_pyapi, - "Call `atanh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto atanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - atanh_dispatch_vector); - }; - m.def("_mkl_atanh_to_call", atanh_need_to_call_pyapi, - "Check input arguments to answer if `atanh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_atan(m); + vm_ns::init_atan2(m); + vm_ns::init_atanh(m); // UnaryUfunc: ==== Cbrt(x) ==== { From ef195da411235dceeaffeec44f163d875fc11e0f Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 6 Jun 2024 18:55:45 +0200 Subject: [PATCH 13/35] Resolve issue with calling MKL function for undefined types --- dpnp/backend/extensions/vm/abs.cpp | 4 +- dpnp/backend/extensions/vm/acos.cpp | 4 +- dpnp/backend/extensions/vm/acosh.cpp | 4 +- dpnp/backend/extensions/vm/add.cpp | 1 + dpnp/backend/extensions/vm/asin.cpp | 4 +- dpnp/backend/extensions/vm/asinh.cpp | 4 +- dpnp/backend/extensions/vm/atan.cpp | 4 +- dpnp/backend/extensions/vm/atan2.cpp | 1 + dpnp/backend/extensions/vm/atanh.cpp | 4 +- dpnp/backend/extensions/vm/common.hpp | 198 ++++++++++++++++++++++++++ 10 files changed, 214 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index ac41e48989d9..4b0b081f0790 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -128,8 +128,8 @@ void init_abs(py::module_ m) auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, "Check input arguments to answer if `abs` function from " diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 3a33e570e79f..f1c73ed000d7 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -128,8 +128,8 @@ void init_acos(py::module_ m) auto acos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_acos_to_call", acos_need_to_call_pyapi, "Check input arguments to answer if `acos` function from " diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index a3e1a01fe808..486ea5fdcdc3 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -128,8 +128,8 @@ void init_acosh(py::module_ m) auto acosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_acosh_to_call", acosh_need_to_call_pyapi, "Check input arguments to answer if `acosh` function from " diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 627c87f8afbc..856ed669e5d8 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -159,6 +159,7 @@ void init_add(py::module_ m) auto add_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_add_to_call", add_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index 8f2bfea867b4..d19677242cc4 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -128,8 +128,8 @@ void init_asin(py::module_ m) auto asin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_asin_to_call", asin_need_to_call_pyapi, "Check input arguments to answer if `asin` function from " diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index 2d72dde6ccce..c5b55d5a6380 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -128,8 +128,8 @@ void init_asinh(py::module_ m) auto asinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_asinh_to_call", asinh_need_to_call_pyapi, "Check input arguments to answer if `asinh` function from " diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index 9ce1b59aa145..4cbbfa92e39e 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -128,8 +128,8 @@ void init_atan(py::module_ m) auto atan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_atan_to_call", atan_need_to_call_pyapi, "Check input arguments to answer if `atan` function from " diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index 593edf4a773a..ff58b81d52e6 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -148,6 +148,7 @@ void init_atan2(py::module_ m) auto atan2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_atan2_to_call", atan2_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index d1d44a7a2f63..0278a31e1bf2 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -128,8 +128,8 @@ void init_atanh(py::module_ m) auto atanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - contig_dispatch_vector); + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_atanh_to_call", atanh_need_to_call_pyapi, "Check input arguments to answer if `atanh` function from " diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index 570c79b15122..e2fcc9e2dde0 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -434,6 +434,204 @@ bool need_to_call_binary_ufunc(sycl::queue exec_q, return true; } +template +bool need_to_call_unary_ufunc(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray &src, + dpctl::tensor::usm_ndarray &dst, + const output_typesT &output_type_vec, + const contig_dispatchT &contig_dispatch_vector) +{ + // check type_nums + int src_typenum = src.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src_typeid = array_types.typenum_to_lookup_id(src_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + // check that types are supported + int func_output_typeid = output_type_vec[src_typeid]; + if (dst_typeid != func_output_typeid) { + return false; + } + + // OneMKL VM functions perform a copy on host if no double type support + if (!exec_q.get_device().has(sycl::aspect::fp64)) { + return false; + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) { + return false; + } + + // dimensions must be the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src.get_ndim()) { + return false; + } + else if (dst_nd == 0) { + // don't call OneMKL for 0d arrays + return false; + } + + // shapes must be the same + const py::ssize_t *src_shape = src.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) { + src_nelems *= static_cast(src_shape[i]); + shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); + } + if (!shapes_equal) { + return false; + } + + // if nelems is zero, return false + if (src_nelems == 0) { + return false; + } + + // ensure that output is ample enough to accommodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accommodate all elements + { + size_t range = + static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) { + return false; + } + } + + // check memory overlap + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src, dst)) { + return false; + } + + // support only contiguous inputs + bool is_src_c_contig = src.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = (is_src_c_contig && is_dst_c_contig); + if (!all_c_contig) { + return false; + } + + // MKL function is not defined for the type + if (contig_dispatch_vector[src_typeid] == nullptr) { + return false; + } + return true; +} + +template +bool need_to_call_binary_ufunc(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray &src1, + dpctl::tensor::usm_ndarray &src2, + dpctl::tensor::usm_ndarray &dst, + const output_typesT &output_type_table, + const contig_dispatchT &contig_dispatch_table) +{ + // check type_nums + int src1_typenum = src1.get_typenum(); + int src2_typenum = src2.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); + int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + // check that types are supported + int output_typeid = output_type_table[src1_typeid][src2_typeid]; + if (output_typeid != dst_typeid) { + return false; + } + + // types must be the same + if (src1_typeid != src2_typeid) { + return false; + } + + // OneMKL VM functions perform a copy on host if no double type support + if (!exec_q.get_device().has(sycl::aspect::fp64)) { + return false; + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) { + return false; + } + + // dimensions must be the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) { + return false; + } + else if (dst_nd == 0) { + // don't call OneMKL for 0d arrays + return false; + } + + // shapes must be the same + const py::ssize_t *src1_shape = src1.get_shape_raw(); + const py::ssize_t *src2_shape = src2.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) { + src_nelems *= static_cast(src1_shape[i]); + shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && + src2_shape[i] == dst_shape[i]); + } + if (!shapes_equal) { + return false; + } + + // if nelems is zero, return false + if (src_nelems == 0) { + return false; + } + + // ensure that output is ample enough to accommodate all elements + auto dst_offsets = dst.get_minmax_offsets(); + // destination must be ample enough to accommodate all elements + { + size_t range = + static_cast(dst_offsets.second - dst_offsets.first); + if (range + 1 < src_nelems) { + return false; + } + } + + // check memory overlap + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src1, dst) || overlap(src2, dst)) { + return false; + } + + // support only contiguous inputs + bool is_src1_c_contig = src1.is_c_contiguous(); + bool is_src2_c_contig = src2.is_c_contiguous(); + bool is_dst_c_contig = dst.is_c_contiguous(); + + bool all_c_contig = + (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); + if (!all_c_contig) { + return false; + } + + // MKL function is not defined for the type + if (contig_dispatch_table[src1_typeid] == nullptr) { + return false; + } + return true; +} + /** * @brief A macro used to define factories and a populating unary functions * to dispatch to a callback with proper OneMKL function within VM extension From a76d5e2ca5cbfe27a2cac0fc177dda6e23c13142 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 7 Jun 2024 13:42:32 +0200 Subject: [PATCH 14/35] Separated implementation of cbrt, ceil, conj, cos and cosh functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 5 + dpnp/backend/extensions/vm/cbrt.cpp | 137 ++++++++++++++++++ dpnp/backend/extensions/vm/cbrt.hpp | 54 +------ dpnp/backend/extensions/vm/ceil.cpp | 137 ++++++++++++++++++ dpnp/backend/extensions/vm/ceil.hpp | 54 +------ dpnp/backend/extensions/vm/conj.cpp | 137 ++++++++++++++++++ dpnp/backend/extensions/vm/conj.hpp | 54 +------ dpnp/backend/extensions/vm/cos.cpp | 139 ++++++++++++++++++ dpnp/backend/extensions/vm/cos.hpp | 54 +------ dpnp/backend/extensions/vm/cosh.cpp | 139 ++++++++++++++++++ dpnp/backend/extensions/vm/cosh.hpp | 54 +------ dpnp/backend/extensions/vm/types_matrix.hpp | 96 ------------- dpnp/backend/extensions/vm/vm_py.cpp | 150 +------------------- 13 files changed, 724 insertions(+), 486 deletions(-) create mode 100644 dpnp/backend/extensions/vm/cbrt.cpp create mode 100644 dpnp/backend/extensions/vm/ceil.cpp create mode 100644 dpnp/backend/extensions/vm/conj.cpp create mode 100644 dpnp/backend/extensions/vm/cos.cpp create mode 100644 dpnp/backend/extensions/vm/cosh.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 02b6f9125e4e..cca434d08127 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -33,6 +33,11 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/atan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/atan2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/atanh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cbrt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ceil.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/conj.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cos.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cosh.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp new file mode 100644 index 000000000000..4e662caa1746 --- /dev/null +++ b/dpnp/backend/extensions/vm/cbrt.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "cbrt.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::cbrt function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event cbrt_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::cbrt(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(cbrt); +} // namespace impl + +void init_cbrt(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto cbrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_cbrt", cbrt_pyapi, + "Call `cbrt` function from OneMKL VM library to compute " + "the element-wise cube root of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto cbrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_cbrt_to_call", cbrt_need_to_call_pyapi, + "Check input arguments to answer if `cbrt` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/cbrt.hpp b/dpnp/backend/extensions/vm/cbrt.hpp index 5c0a0adc53e0..d4eb052a65b6 100644 --- a/dpnp/backend/extensions/vm/cbrt.hpp +++ b/dpnp/backend/extensions/vm/cbrt.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event cbrt_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::CbrtOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::cbrt(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct CbrtContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::CbrtOutputType::value_type, void>) - { - return nullptr; - } - else { - return cbrt_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_cbrt(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp new file mode 100644 index 000000000000..3feb4b8ae352 --- /dev/null +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "ceil.hpp" +#include "common.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::ceil function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event ceil_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::ceil(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(ceil); +} // namespace impl + +void init_ceil(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto ceil_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_ceil", ceil_pyapi, + "Call `ceil` function from OneMKL VM library to compute " + "ceiling of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto ceil_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_ceil_to_call", ceil_need_to_call_pyapi, + "Check input arguments to answer if `ceil` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/ceil.hpp b/dpnp/backend/extensions/vm/ceil.hpp index fd4f3a8680ce..dd9006d1b184 100644 --- a/dpnp/backend/extensions/vm/ceil.hpp +++ b/dpnp/backend/extensions/vm/ceil.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event ceil_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::CeilOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::ceil(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct CeilContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::CeilOutputType::value_type, void>) - { - return nullptr; - } - else { - return ceil_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_ceil(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp new file mode 100644 index 000000000000..81ae93834ca6 --- /dev/null +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "conj.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::conj function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event conj_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::conj(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(conj); +} // namespace impl + +void init_conj(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto conj_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_conj", conj_pyapi, + "Call `conj` function from OneMKL VM library to compute " + "conjugate of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_conj_to_call", conj_need_to_call_pyapi, + "Check input arguments to answer if `conj` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/conj.hpp b/dpnp/backend/extensions/vm/conj.hpp index af3acb3466ea..0ce61082ab6f 100644 --- a/dpnp/backend/extensions/vm/conj.hpp +++ b/dpnp/backend/extensions/vm/conj.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event conj_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::ConjOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::conj(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct ConjContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::ConjOutputType::value_type, void>) - { - return nullptr; - } - else { - return conj_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_conj(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp new file mode 100644 index 000000000000..bf2f564414bd --- /dev/null +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "cos.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::cos function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event cos_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::cos(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(cos); +} // namespace impl + +void init_cos(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto cos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_cos", cos_pyapi, + "Call `cos` function from OneMKL VM library to compute " + "cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto cos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_cos_to_call", cos_need_to_call_pyapi, + "Check input arguments to answer if `cos` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/cos.hpp b/dpnp/backend/extensions/vm/cos.hpp index a085123ca143..59c92ad0fd8f 100644 --- a/dpnp/backend/extensions/vm/cos.hpp +++ b/dpnp/backend/extensions/vm/cos.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event cos_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::CosOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::cos(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct CosContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::CosOutputType::value_type, void>) - { - return nullptr; - } - else { - return cos_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_cos(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp new file mode 100644 index 000000000000..61563a560dda --- /dev/null +++ b/dpnp/backend/extensions/vm/cosh.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "cosh.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::cosh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event cosh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::cosh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(cosh); +} // namespace impl + +void init_cosh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto cosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_cosh", cosh_pyapi, + "Call `cosh` function from OneMKL VM library to compute " + "the hyperbolic cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto cosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_cosh_to_call", cosh_need_to_call_pyapi, + "Check input arguments to answer if `cosh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/cosh.hpp b/dpnp/backend/extensions/vm/cosh.hpp index 301a2fbeb22c..030ef945823b 100644 --- a/dpnp/backend/extensions/vm/cosh.hpp +++ b/dpnp/backend/extensions/vm/cosh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event cosh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::CoshOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::cosh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct CoshContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::CoshOutputType::value_type, void>) - { - return nullptr; - } - else { - return cosh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_cosh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 940c68565e7a..e8c9d0d574b2 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,102 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::atanh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct AtanhOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::cbrt function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct CbrtOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::ceil function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct CeilOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::conj function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct ConjOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::cos function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct CosOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::cosh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct CoshOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::div function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 9b80a7bea46b..fdd972b61d3b 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,11 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t cbrt_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t ceil_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t conj_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t cos_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t cosh_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t exp_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t exp2_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,146 +111,11 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_atan(m); vm_ns::init_atan2(m); vm_ns::init_atanh(m); - - // UnaryUfunc: ==== Cbrt(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - cbrt_dispatch_vector); - - auto cbrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - cbrt_dispatch_vector); - }; - m.def("_cbrt", cbrt_pyapi, - "Call `cbrt` function from OneMKL VM library to compute " - "the element-wise cube root of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto cbrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - cbrt_dispatch_vector); - }; - m.def("_mkl_cbrt_to_call", cbrt_need_to_call_pyapi, - "Check input arguments to answer if `cbrt` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Ceil(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - ceil_dispatch_vector); - - auto ceil_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - ceil_dispatch_vector); - }; - m.def("_ceil", ceil_pyapi, - "Call `ceil` function from OneMKL VM library to compute " - "ceiling of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto ceil_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - ceil_dispatch_vector); - }; - m.def("_mkl_ceil_to_call", ceil_need_to_call_pyapi, - "Check input arguments to answer if `ceil` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Conj(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - conj_dispatch_vector); - - auto conj_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - conj_dispatch_vector); - }; - m.def("_conj", conj_pyapi, - "Call `conj` function from OneMKL VM library to compute " - "conjugate of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - conj_dispatch_vector); - }; - m.def("_mkl_conj_to_call", conj_need_to_call_pyapi, - "Check input arguments to answer if `conj` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Cos(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - cos_dispatch_vector); - - auto cos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - cos_dispatch_vector); - }; - m.def("_cos", cos_pyapi, - "Call `cos` function from OneMKL VM library to compute " - "cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto cos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - cos_dispatch_vector); - }; - m.def("_mkl_cos_to_call", cos_need_to_call_pyapi, - "Check input arguments to answer if `cos` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Cosh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - cosh_dispatch_vector); - - auto cosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - cosh_dispatch_vector); - }; - m.def("_cosh", cosh_pyapi, - "Call `cosh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto cosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - cosh_dispatch_vector); - }; - m.def("_mkl_cosh_to_call", cosh_need_to_call_pyapi, - "Check input arguments to answer if `cosh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_cbrt(m); + vm_ns::init_ceil(m); + vm_ns::init_conj(m); + vm_ns::init_cos(m); + vm_ns::init_cosh(m); // BinaryUfunc: ==== Div(x1, x2) ==== { From 31b916f48c7a53bdae2953dae012e7378ceae2a9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 7 Jun 2024 14:54:32 +0200 Subject: [PATCH 15/35] Separated implementation of div, exp, exp2, expm1, floor and hypot functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 6 + dpnp/backend/extensions/vm/div.cpp | 171 ++++++++++++++++++ dpnp/backend/extensions/vm/div.hpp | 57 +----- dpnp/backend/extensions/vm/exp.cpp | 139 +++++++++++++++ dpnp/backend/extensions/vm/exp.hpp | 54 +----- dpnp/backend/extensions/vm/exp2.cpp | 137 +++++++++++++++ dpnp/backend/extensions/vm/exp2.hpp | 54 +----- dpnp/backend/extensions/vm/expm1.cpp | 137 +++++++++++++++ dpnp/backend/extensions/vm/expm1.hpp | 54 +----- dpnp/backend/extensions/vm/floor.cpp | 137 +++++++++++++++ dpnp/backend/extensions/vm/floor.hpp | 54 +----- dpnp/backend/extensions/vm/hypot.cpp | 160 +++++++++++++++++ dpnp/backend/extensions/vm/hypot.hpp | 57 +----- dpnp/backend/extensions/vm/types_matrix.hpp | 102 ----------- dpnp/backend/extensions/vm/vm_py.cpp | 183 +------------------- 15 files changed, 923 insertions(+), 579 deletions(-) create mode 100644 dpnp/backend/extensions/vm/div.cpp create mode 100644 dpnp/backend/extensions/vm/exp.cpp create mode 100644 dpnp/backend/extensions/vm/exp2.cpp create mode 100644 dpnp/backend/extensions/vm/expm1.cpp create mode 100644 dpnp/backend/extensions/vm/floor.cpp create mode 100644 dpnp/backend/extensions/vm/hypot.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index cca434d08127..9de3c7908088 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -38,6 +38,12 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/conj.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cosh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/div.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/exp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/exp2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/expm1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/floor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hypot.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp new file mode 100644 index 000000000000..eb6b6519ed93 --- /dev/null +++ b/dpnp/backend/extensions/vm/div.cpp @@ -0,0 +1,171 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "div.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::div function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event div_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::div(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(div); +} // namespace impl + +void init_div(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto div_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_div", div_pyapi, + "Call `div` function from OneMKL VM library to performs element " + "by element division of vector `src1` by vector `src2` " + "to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto div_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_div_to_call", div_need_to_call_pyapi, + "Check input arguments to answer if `div` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/div.hpp b/dpnp/backend/extensions/vm/div.hpp index c13066604840..8095f0bb2cb6 100644 --- a/dpnp/backend/extensions/vm/div.hpp +++ b/dpnp/backend/extensions/vm/div.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event div_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::DivOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::div(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct DivContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::DivOutputType::value_type, void>) - { - return nullptr; - } - else { - return div_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_div(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp new file mode 100644 index 000000000000..dba1a71ee1d0 --- /dev/null +++ b/dpnp/backend/extensions/vm/exp.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "exp.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::exp function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event exp_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::exp(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(exp); +} // namespace impl + +void init_exp(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto exp_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_exp", exp_pyapi, + "Call `exp` function from OneMKL VM library to compute " + "the natural (base-e) exponential of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto exp_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_exp_to_call", exp_need_to_call_pyapi, + "Check input arguments to answer if `exp` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/exp.hpp b/dpnp/backend/extensions/vm/exp.hpp index 936b6a5a0ce5..a1d88998fd4f 100644 --- a/dpnp/backend/extensions/vm/exp.hpp +++ b/dpnp/backend/extensions/vm/exp.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event exp_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::ExpOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::exp(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct ExpContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::ExpOutputType::value_type, void>) - { - return nullptr; - } - else { - return exp_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_exp(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp new file mode 100644 index 000000000000..3d79e2b28e1b --- /dev/null +++ b/dpnp/backend/extensions/vm/exp2.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "exp2.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::exp2 function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event exp2_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::exp2(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(exp2); +} // namespace impl + +void init_exp2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto exp2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_exp2", exp2_pyapi, + "Call `exp2` function from OneMKL VM library to compute " + "the base-2 exponential of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto exp2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_exp2_to_call", exp2_need_to_call_pyapi, + "Check input arguments to answer if `exp2` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/exp2.hpp b/dpnp/backend/extensions/vm/exp2.hpp index 362897fdbe63..fe0694c5181f 100644 --- a/dpnp/backend/extensions/vm/exp2.hpp +++ b/dpnp/backend/extensions/vm/exp2.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event exp2_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::Exp2OutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::exp2(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Exp2ContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Exp2OutputType::value_type, void>) - { - return nullptr; - } - else { - return exp2_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_exp2(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp new file mode 100644 index 000000000000..535a160f5123 --- /dev/null +++ b/dpnp/backend/extensions/vm/expm1.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "expm1.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::expm1 function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event expm1_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::expm1(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(expm1); +} // namespace impl + +void init_expm1(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto expm1_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_expm1", expm1_pyapi, + "Call `expm1` function from OneMKL VM library to compute " + "the subtraction of 1 from the exponential of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto expm1_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_expm1_to_call", expm1_need_to_call_pyapi, + "Check input arguments to answer if `expm1` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/expm1.hpp b/dpnp/backend/extensions/vm/expm1.hpp index d0a94bca8e9d..7719d4948b44 100644 --- a/dpnp/backend/extensions/vm/expm1.hpp +++ b/dpnp/backend/extensions/vm/expm1.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event expm1_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::Expm1OutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::expm1(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Expm1ContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Expm1OutputType::value_type, void>) - { - return nullptr; - } - else { - return expm1_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_expm1(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp new file mode 100644 index 000000000000..21d16c05c361 --- /dev/null +++ b/dpnp/backend/extensions/vm/floor.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "floor.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::floor function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event floor_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::floor(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(floor); +} // namespace impl + +void init_floor(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto floor_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_floor", floor_pyapi, + "Call `floor` function from OneMKL VM library to compute " + "the floor of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto floor_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_floor_to_call", floor_need_to_call_pyapi, + "Check input arguments to answer if `floor` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/floor.hpp b/dpnp/backend/extensions/vm/floor.hpp index c138b8b66782..4cc85f2bb897 100644 --- a/dpnp/backend/extensions/vm/floor.hpp +++ b/dpnp/backend/extensions/vm/floor.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event floor_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::FloorOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::floor(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct FloorContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::FloorOutputType::value_type, void>) - { - return nullptr; - } - else { - return floor_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_floor(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp new file mode 100644 index 000000000000..5b3a8275c7ef --- /dev/null +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -0,0 +1,160 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "hypot.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::hypot function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event hypot_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::hypot(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(hypot); +} // namespace impl + +void init_hypot(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto hypot_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_hypot", hypot_pyapi, + "Call `hypot` function from OneMKL VM library to compute " + "the square root of sum of squares elementwisely", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto hypot_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_hypot_to_call", hypot_need_to_call_pyapi, + "Check input arguments to answer if `hypot` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/hypot.hpp b/dpnp/backend/extensions/vm/hypot.hpp index 19dd4345c36f..f7a171556d09 100644 --- a/dpnp/backend/extensions/vm/hypot.hpp +++ b/dpnp/backend/extensions/vm/hypot.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event hypot_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::HypotOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::hypot(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct HypotContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::HypotOutputType::value_type, void>) - { - return nullptr; - } - else { - return hypot_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_hypot(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index e8c9d0d574b2..8c7c0217ec22 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,108 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::div function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct DivOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::exp function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct ExpOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::exp2 function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct Exp2OutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::expm1 function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct Expm1OutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::floor function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct FloorOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::hypot function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct HypotOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::ln function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index fdd972b61d3b..6ca46c6f91ff 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,12 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static binary_impl_fn_ptr_t div_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t exp_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t exp2_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t expm1_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t floor_dispatch_vector[dpctl_td_ns::num_types]; -static binary_impl_fn_ptr_t hypot_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t ln_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t log10_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t log1p_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,177 +110,12 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_conj(m); vm_ns::init_cos(m); vm_ns::init_cosh(m); - - // BinaryUfunc: ==== Div(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - div_dispatch_vector); - - auto div_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - div_dispatch_vector); - }; - m.def("_div", div_pyapi, - "Call `div` function from OneMKL VM library to performs element " - "by element division of vector `src1` by vector `src2` " - "to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto div_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - div_dispatch_vector); - }; - m.def("_mkl_div_to_call", div_need_to_call_pyapi, - "Check input arguments to answer if `div` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } - - // UnaryUfunc: ==== Exp(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - exp_dispatch_vector); - - auto exp_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - exp_dispatch_vector); - }; - m.def("_exp", exp_pyapi, - "Call `exp` function from OneMKL VM library to compute " - "natural (base-e) exponential of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto exp_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - exp_dispatch_vector); - }; - m.def("_mkl_exp_to_call", exp_need_to_call_pyapi, - "Check input arguments to answer if `exp` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== exp2(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - exp2_dispatch_vector); - - auto exp2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - exp2_dispatch_vector); - }; - m.def("_exp2", exp2_pyapi, - "Call `exp2` function from OneMKL VM library to compute " - "the element-wise base-2 exponential of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto exp2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - exp2_dispatch_vector); - }; - m.def("_mkl_exp2_to_call", exp2_need_to_call_pyapi, - "Check input arguments to answer if `exp2` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== expm1(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - expm1_dispatch_vector); - - auto expm1_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - expm1_dispatch_vector); - }; - m.def("_expm1", expm1_pyapi, - "Call `expm1` function from OneMKL VM library to compute " - "subtraction of 1 from the exponential of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto expm1_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - expm1_dispatch_vector); - }; - m.def("_mkl_expm1_to_call", expm1_need_to_call_pyapi, - "Check input arguments to answer if `expm1` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Floor(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - floor_dispatch_vector); - - auto floor_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - floor_dispatch_vector); - }; - m.def("_floor", floor_pyapi, - "Call `floor` function from OneMKL VM library to compute " - "floor of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto floor_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - floor_dispatch_vector); - }; - m.def("_mkl_floor_to_call", floor_need_to_call_pyapi, - "Check input arguments to answer if `floor` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // BinaryUfunc: ==== Hypot(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - hypot_dispatch_vector); - - auto hypot_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - hypot_dispatch_vector); - }; - m.def("_hypot", hypot_pyapi, - "Call `hypot` function from OneMKL VM library to compute element " - "by element hypotenuse of `x`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto hypot_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - hypot_dispatch_vector); - }; - m.def("_mkl_hypot_to_call", hypot_need_to_call_pyapi, - "Check input arguments to answer if `hypot` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } + vm_ns::init_div(m); + vm_ns::init_exp(m); + vm_ns::init_exp2(m); + vm_ns::init_expm1(m); + vm_ns::init_floor(m); + vm_ns::init_hypot(m); // UnaryUfunc: ==== Ln(x) ==== { From 616186e7defce143cb542119a702cedc9f6a79e8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 7 Jun 2024 16:54:25 +0200 Subject: [PATCH 16/35] Separated implementation of ln, log1p, log2 and log10 functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 4 + dpnp/backend/extensions/vm/acos.cpp | 2 +- dpnp/backend/extensions/vm/acosh.cpp | 2 +- dpnp/backend/extensions/vm/asin.cpp | 2 +- dpnp/backend/extensions/vm/asinh.cpp | 2 +- dpnp/backend/extensions/vm/atan.cpp | 2 +- dpnp/backend/extensions/vm/atanh.cpp | 2 +- dpnp/backend/extensions/vm/ceil.cpp | 2 +- dpnp/backend/extensions/vm/conj.cpp | 2 +- dpnp/backend/extensions/vm/cos.cpp | 2 +- dpnp/backend/extensions/vm/ln.cpp | 139 ++++++++++++++++++++ dpnp/backend/extensions/vm/ln.hpp | 53 +------- dpnp/backend/extensions/vm/log10.cpp | 139 ++++++++++++++++++++ dpnp/backend/extensions/vm/log10.hpp | 54 +------- dpnp/backend/extensions/vm/log1p.cpp | 137 +++++++++++++++++++ dpnp/backend/extensions/vm/log1p.hpp | 54 +------- dpnp/backend/extensions/vm/log2.cpp | 137 +++++++++++++++++++ dpnp/backend/extensions/vm/log2.hpp | 54 +------- dpnp/backend/extensions/vm/types_matrix.hpp | 64 --------- dpnp/backend/extensions/vm/vm_py.cpp | 120 +---------------- 20 files changed, 589 insertions(+), 384 deletions(-) create mode 100644 dpnp/backend/extensions/vm/ln.cpp create mode 100644 dpnp/backend/extensions/vm/log10.cpp create mode 100644 dpnp/backend/extensions/vm/log1p.cpp create mode 100644 dpnp/backend/extensions/vm/log2.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 9de3c7908088..37415c6c57d7 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -44,6 +44,10 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/expm1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/floor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hypot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ln.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/log10.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/log1p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/log2.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index f1c73ed000d7..3e99ddd38ab8 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -122,7 +122,7 @@ void init_acos(py::module_ m) }; m.def("_acos", acos_pyapi, "Call `acos` function from OneMKL VM library to compute " - "inverse cosine of vector elements", + "the inverse cosine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index 486ea5fdcdc3..626f6eb17b6c 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -122,7 +122,7 @@ void init_acosh(py::module_ m) }; m.def("_acosh", acosh_pyapi, "Call `acosh` function from OneMKL VM library to compute " - "inverse hyperbolic cosine of vector elements", + "the inverse hyperbolic cosine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index d19677242cc4..b00b9a55ad42 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -122,7 +122,7 @@ void init_asin(py::module_ m) }; m.def("_asin", asin_pyapi, "Call `asin` function from OneMKL VM library to compute " - "inverse sine of vector elements", + "the inverse sine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index c5b55d5a6380..a21cf0d78bde 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -122,7 +122,7 @@ void init_asinh(py::module_ m) }; m.def("_asinh", asinh_pyapi, "Call `asinh` function from OneMKL VM library to compute " - "inverse hyperbolic sine of vector elements", + "the inverse hyperbolic sine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index 4cbbfa92e39e..cdcf437fcb06 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -122,7 +122,7 @@ void init_atan(py::module_ m) }; m.def("_atan", atan_pyapi, "Call `atan` function from OneMKL VM library to compute " - "inverse tangent of vector elements", + "the inverse tangent of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index 0278a31e1bf2..9ab2b56332a6 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -122,7 +122,7 @@ void init_atanh(py::module_ m) }; m.def("_atanh", atanh_pyapi, "Call `atanh` function from OneMKL VM library to compute " - "inverse hyperbolic tangent of vector elements", + "the inverse hyperbolic tangent of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp index 3feb4b8ae352..e4e7136b764f 100644 --- a/dpnp/backend/extensions/vm/ceil.cpp +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -120,7 +120,7 @@ void init_ceil(py::module_ m) }; m.def("_ceil", ceil_pyapi, "Call `ceil` function from OneMKL VM library to compute " - "ceiling of vector elements", + "the ceiling of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp index 81ae93834ca6..6daabd71b72a 100644 --- a/dpnp/backend/extensions/vm/conj.cpp +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -120,7 +120,7 @@ void init_conj(py::module_ m) }; m.def("_conj", conj_pyapi, "Call `conj` function from OneMKL VM library to compute " - "conjugate of vector elements", + "the conjugate of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp index bf2f564414bd..c4e9e1208990 100644 --- a/dpnp/backend/extensions/vm/cos.cpp +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -122,7 +122,7 @@ void init_cos(py::module_ m) }; m.def("_cos", cos_pyapi, "Call `cos` function from OneMKL VM library to compute " - "cosine of vector elements", + "the cosine of vector elements", py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp new file mode 100644 index 000000000000..3d4d2997986a --- /dev/null +++ b/dpnp/backend/extensions/vm/ln.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "ln.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::ln function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event ln_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::ln(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(ln); +} // namespace impl + +void init_ln(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto ln_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_ln", ln_pyapi, + "Call `ln` function from OneMKL VM library to compute " + "the natural logarithm of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto ln_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_ln_to_call", ln_need_to_call_pyapi, + "Check input arguments to answer if `ln` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/ln.hpp b/dpnp/backend/extensions/vm/ln.hpp index 574cc8fa33c9..7dadf76b2fdb 100644 --- a/dpnp/backend/extensions/vm/ln.hpp +++ b/dpnp/backend/extensions/vm/ln.hpp @@ -25,54 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event ln_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::LnOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::ln(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct LnContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::LnOutputType::value_type, void>) { - return nullptr; - } - else { - return ln_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_ln(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp new file mode 100644 index 000000000000..c95c64bcc470 --- /dev/null +++ b/dpnp/backend/extensions/vm/log10.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "log10.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::log10 function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event log10_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::log10(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(log10); +} // namespace impl + +void init_log10(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto log10_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_log10", log10_pyapi, + "Call `log10` function from OneMKL VM library to compute " + "the base-10 logarithm of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto log10_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_log10_to_call", log10_need_to_call_pyapi, + "Check input arguments to answer if `log10` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log10.hpp b/dpnp/backend/extensions/vm/log10.hpp index dc030817cdaa..c62ae122d356 100644 --- a/dpnp/backend/extensions/vm/log10.hpp +++ b/dpnp/backend/extensions/vm/log10.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event log10_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::Log10OutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::log10(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Log10ContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Log10OutputType::value_type, void>) - { - return nullptr; - } - else { - return log10_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_log10(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp new file mode 100644 index 000000000000..a1bdb3bb0258 --- /dev/null +++ b/dpnp/backend/extensions/vm/log1p.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "log1p.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::log1p function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event log1p_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::log1p(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(log1p); +} // namespace impl + +void init_log1p(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto log1p_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_log1p", log1p_pyapi, + "Call `log1p` function from OneMKL VM library to compute " + "the natural logarithm of 1 plus vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto log1p_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_log1p_to_call", log1p_need_to_call_pyapi, + "Check input arguments to answer if `log1p` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log1p.hpp b/dpnp/backend/extensions/vm/log1p.hpp index 39ab1b3a21cb..7cbfb1fe1873 100644 --- a/dpnp/backend/extensions/vm/log1p.hpp +++ b/dpnp/backend/extensions/vm/log1p.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event log1p_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::Log1pOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::log1p(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Log1pContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Log1pOutputType::value_type, void>) - { - return nullptr; - } - else { - return log1p_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_log1p(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp new file mode 100644 index 000000000000..54021c8b0919 --- /dev/null +++ b/dpnp/backend/extensions/vm/log2.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "log2.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::log2 function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event log2_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::log2(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(log2); +} // namespace impl + +void init_log2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto log2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_log2", log2_pyapi, + "Call `log2` function from OneMKL VM library to compute " + "the base-2 logarithm of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto log2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_log2_to_call", log2_need_to_call_pyapi, + "Check input arguments to answer if `log2` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/log2.hpp b/dpnp/backend/extensions/vm/log2.hpp index 2c419ac8ab26..34dd1a92136e 100644 --- a/dpnp/backend/extensions/vm/log2.hpp +++ b/dpnp/backend/extensions/vm/log2.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event log2_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::Log2OutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::log2(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct Log2ContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::Log2OutputType::value_type, void>) - { - return nullptr; - } - else { - return log2_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_log2(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 8c7c0217ec22..ff537ef4e18f 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,70 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::ln function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct LnOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::log10 function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct Log10OutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::log1p function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct Log1pOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::log2 function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct Log2OutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::mul function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 6ca46c6f91ff..6a83fb6f93a2 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -75,10 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static unary_impl_fn_ptr_t ln_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t log10_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t log1p_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t log2_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t mul_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t pow_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t round_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,118 +112,10 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_expm1(m); vm_ns::init_floor(m); vm_ns::init_hypot(m); - - // UnaryUfunc: ==== Ln(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - ln_dispatch_vector); - - auto ln_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - ln_dispatch_vector); - }; - m.def("_ln", ln_pyapi, - "Call `ln` function from OneMKL VM library to compute " - "natural logarithm of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto ln_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - ln_dispatch_vector); - }; - m.def("_mkl_ln_to_call", ln_need_to_call_pyapi, - "Check input arguments to answer if `ln` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Log10(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - log10_dispatch_vector); - - auto log10_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - log10_dispatch_vector); - }; - m.def("_log10", log10_pyapi, - "Call `log10` function from OneMKL VM library to compute " - "base-10 logarithm of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto log10_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - log10_dispatch_vector); - }; - m.def("_mkl_log10_to_call", log10_need_to_call_pyapi, - "Check input arguments to answer if `log10` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Log1p(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - log1p_dispatch_vector); - - auto log1p_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - log1p_dispatch_vector); - }; - m.def("_log1p", log1p_pyapi, - "Call `log1p` function from OneMKL VM library to compute " - "natural logarithm of 1 plus vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto log1p_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - log1p_dispatch_vector); - }; - m.def("_mkl_log1p_to_call", log1p_need_to_call_pyapi, - "Check input arguments to answer if `log1p` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Log2(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - log2_dispatch_vector); - - auto log2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - log2_dispatch_vector); - }; - m.def("_log2", log2_pyapi, - "Call `log2` function from OneMKL VM library to compute " - "base-2 logarithm of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto log2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - log2_dispatch_vector); - }; - m.def("_mkl_log2_to_call", log2_need_to_call_pyapi, - "Check input arguments to answer if `log2` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_ln(m); + vm_ns::init_log10(m); + vm_ns::init_log1p(m); + vm_ns::init_log2(m); // BinaryUfunc: ==== Mul(x1, x2) ==== { From 3cd6f2d338d2890c196ed9f262b733f2a8f9077b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 7 Jun 2024 17:15:10 +0200 Subject: [PATCH 17/35] Separated implementation of mul, pow, rint, sin and sinh functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 5 + dpnp/backend/extensions/vm/mul.cpp | 171 ++++++++++++++++++ dpnp/backend/extensions/vm/mul.hpp | 57 +----- dpnp/backend/extensions/vm/pow.cpp | 171 ++++++++++++++++++ dpnp/backend/extensions/vm/pow.hpp | 57 +----- dpnp/backend/extensions/vm/rint.cpp | 137 ++++++++++++++ .../extensions/vm/{round.hpp => rint.hpp} | 54 +----- dpnp/backend/extensions/vm/sin.cpp | 139 ++++++++++++++ dpnp/backend/extensions/vm/sin.hpp | 54 +----- dpnp/backend/extensions/vm/sinh.cpp | 139 ++++++++++++++ dpnp/backend/extensions/vm/sinh.hpp | 54 +----- dpnp/backend/extensions/vm/types_matrix.hpp | 99 ---------- dpnp/backend/extensions/vm/vm_py.cpp | 156 +--------------- 13 files changed, 793 insertions(+), 500 deletions(-) create mode 100644 dpnp/backend/extensions/vm/mul.cpp create mode 100644 dpnp/backend/extensions/vm/pow.cpp create mode 100644 dpnp/backend/extensions/vm/rint.cpp rename dpnp/backend/extensions/vm/{round.hpp => rint.hpp} (53%) create mode 100644 dpnp/backend/extensions/vm/sin.cpp create mode 100644 dpnp/backend/extensions/vm/sinh.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 37415c6c57d7..95ea7594f355 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -48,6 +48,11 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/log10.cpp ${CMAKE_CURRENT_SOURCE_DIR}/log1p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/log2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mul.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/pow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rint.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sin.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sinh.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp new file mode 100644 index 000000000000..9750f96bfad0 --- /dev/null +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -0,0 +1,171 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "mul.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::mul function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event mul_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::mul(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(mul); +} // namespace impl + +void init_mul(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto mul_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_mul", mul_pyapi, + "Call `mul` function from OneMKL VM library to performs element " + "by element multiplication of vector `src1` by vector `src2` " + "to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto mul_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_mul_to_call", mul_need_to_call_pyapi, + "Check input arguments to answer if `mul` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/mul.hpp b/dpnp/backend/extensions/vm/mul.hpp index 39ea8eec20ab..4dd138aea528 100644 --- a/dpnp/backend/extensions/vm/mul.hpp +++ b/dpnp/backend/extensions/vm/mul.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event mul_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::MulOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::mul(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct MulContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::MulOutputType::value_type, void>) - { - return nullptr; - } - else { - return mul_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_mul(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp new file mode 100644 index 000000000000..4ce45a77d239 --- /dev/null +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -0,0 +1,171 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "pow.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::pow function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event pow_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::pow(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(pow); +} // namespace impl + +void init_pow(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto pow_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow", pow_pyapi, + "Call `pow` function from OneMKL VM library to performs element " + "by element exponentiation of vector `src1` raised to the power " + "of vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto pow_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_pow_to_call", pow_need_to_call_pyapi, + "Check input arguments to answer if `pow` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/pow.hpp b/dpnp/backend/extensions/vm/pow.hpp index f5e946914bf3..ef6770d10651 100644 --- a/dpnp/backend/extensions/vm/pow.hpp +++ b/dpnp/backend/extensions/vm/pow.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event pow_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::PowOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::pow(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct PowContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::PowOutputType::value_type, void>) - { - return nullptr; - } - else { - return pow_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_pow(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp new file mode 100644 index 000000000000..6a65352bc01f --- /dev/null +++ b/dpnp/backend/extensions/vm/rint.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "rint.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::rint function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event rint_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::rint(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(rint); +} // namespace impl + +void init_rint(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto rint_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_round", rint_pyapi, + "Call `rint` function from OneMKL VM library to compute " + "the rounded value of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto rint_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_round_to_call", rint_need_to_call_pyapi, + "Check input arguments to answer if `rint` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/round.hpp b/dpnp/backend/extensions/vm/rint.hpp similarity index 53% rename from dpnp/backend/extensions/vm/round.hpp rename to dpnp/backend/extensions/vm/rint.hpp index a2ae3b3bc528..ce493368788f 100644 --- a/dpnp/backend/extensions/vm/round.hpp +++ b/dpnp/backend/extensions/vm/rint.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event round_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::RoundOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::rint(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct RoundContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::RoundOutputType::value_type, void>) - { - return nullptr; - } - else { - return round_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_rint(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp new file mode 100644 index 000000000000..39abddb9c7f0 --- /dev/null +++ b/dpnp/backend/extensions/vm/sin.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "sin.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::sin function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event sin_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::sin(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(sin); +} // namespace impl + +void init_sin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto sin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_sin", sin_pyapi, + "Call `sin` function from OneMKL VM library to compute " + "the sine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto sin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_sin_to_call", sin_need_to_call_pyapi, + "Check input arguments to answer if `sin` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sin.hpp b/dpnp/backend/extensions/vm/sin.hpp index 0af14c68c876..dcda488e728f 100644 --- a/dpnp/backend/extensions/vm/sin.hpp +++ b/dpnp/backend/extensions/vm/sin.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event sin_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::SinOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::sin(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct SinContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::SinOutputType::value_type, void>) - { - return nullptr; - } - else { - return sin_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_sin(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp new file mode 100644 index 000000000000..367c54e9b8cd --- /dev/null +++ b/dpnp/backend/extensions/vm/sinh.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "sinh.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::sinh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event sinh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::sinh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(sinh); +} // namespace impl + +void init_sinh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto sinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_sinh", sinh_pyapi, + "Call `sinh` function from OneMKL VM library to compute " + "the inverse cosine of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto sinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_sinh_to_call", sinh_need_to_call_pyapi, + "Check input arguments to answer if `sinh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sinh.hpp b/dpnp/backend/extensions/vm/sinh.hpp index 6fe53423c535..92f1e740a627 100644 --- a/dpnp/backend/extensions/vm/sinh.hpp +++ b/dpnp/backend/extensions/vm/sinh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event sinh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::SinhOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::sinh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct SinhContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::SinhOutputType::value_type, void>) - { - return nullptr; - } - else { - return sinh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_sinh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index ff537ef4e18f..6336b14f7158 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,105 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::mul function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct MulOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::pow function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct PowOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::rint function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct RoundOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::sin function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct SinOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::sinh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct SinhOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::sqr function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 6a83fb6f93a2..f1ce3d6ea86b 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -57,7 +57,7 @@ #include "log2.hpp" #include "mul.hpp" #include "pow.hpp" -#include "round.hpp" +#include "rint.hpp" #include "sin.hpp" #include "sinh.hpp" #include "sqr.hpp" @@ -75,11 +75,6 @@ namespace vm_ns = dpnp::extensions::vm; using vm_ext::binary_impl_fn_ptr_t; using vm_ext::unary_impl_fn_ptr_t; -static binary_impl_fn_ptr_t mul_dispatch_vector[dpctl_td_ns::num_types]; -static binary_impl_fn_ptr_t pow_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t round_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t sin_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t sinh_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t sqr_dispatch_vector[dpctl_td_ns::num_types]; static unary_impl_fn_ptr_t sqrt_dispatch_vector[dpctl_td_ns::num_types]; static binary_impl_fn_ptr_t sub_dispatch_vector[dpctl_td_ns::num_types]; @@ -116,150 +111,11 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_log10(m); vm_ns::init_log1p(m); vm_ns::init_log2(m); - - // BinaryUfunc: ==== Mul(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - mul_dispatch_vector); - - auto mul_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - mul_dispatch_vector); - }; - m.def("_mul", mul_pyapi, - "Call `mul` function from OneMKL VM library to performs element " - "by element multiplication of vector `src1` by vector `src2` " - "to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto mul_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - mul_dispatch_vector); - }; - m.def("_mkl_mul_to_call", mul_need_to_call_pyapi, - "Check input arguments to answer if `mul` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } - - // BinaryUfunc: ==== Pow(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - pow_dispatch_vector); - - auto pow_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - pow_dispatch_vector); - }; - m.def("_pow", pow_pyapi, - "Call `pow` function from OneMKL VM library to performs element " - "by element exponentiation of vector `src1` raised to the power " - "of vector `src2` to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto pow_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - pow_dispatch_vector); - }; - m.def("_mkl_pow_to_call", pow_need_to_call_pyapi, - "Check input arguments to answer if `pow` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } - - // UnaryUfunc: ==== Round(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - round_dispatch_vector); - - auto round_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - round_dispatch_vector); - }; - m.def("_round", round_pyapi, - "Call `rint` function from OneMKL VM library to compute " - "the rounded value of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto round_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - round_dispatch_vector); - }; - m.def("_mkl_round_to_call", round_need_to_call_pyapi, - "Check input arguments to answer if `rint` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Sin(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - sin_dispatch_vector); - - auto sin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - sin_dispatch_vector); - }; - m.def("_sin", sin_pyapi, - "Call `sin` function from OneMKL VM library to compute " - "sine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto sin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - sin_dispatch_vector); - }; - m.def("_mkl_sin_to_call", sin_need_to_call_pyapi, - "Check input arguments to answer if `sin` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Sinh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - sinh_dispatch_vector); - - auto sinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - sinh_dispatch_vector); - }; - m.def("_sinh", sinh_pyapi, - "Call `sinh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto sinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - sinh_dispatch_vector); - }; - m.def("_mkl_sinh_to_call", sinh_need_to_call_pyapi, - "Check input arguments to answer if `sinh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_mul(m); + vm_ns::init_pow(m); + vm_ns::init_rint(m); + vm_ns::init_sin(m); + vm_ns::init_sinh(m); // UnaryUfunc: ==== Sqr(x) ==== { From 8c8b9aad9042934f05ac2cadbade7c6f1c78ab9a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 10 Jun 2024 19:56:46 +0200 Subject: [PATCH 18/35] Separated implementation of sqr, sqrt, sub, tan, tanh and trunc functions --- dpnp/backend/extensions/vm/CMakeLists.txt | 6 + dpnp/backend/extensions/vm/sqr.cpp | 137 ++++++++++++++ dpnp/backend/extensions/vm/sqr.hpp | 54 +----- dpnp/backend/extensions/vm/sqrt.cpp | 140 ++++++++++++++ dpnp/backend/extensions/vm/sqrt.hpp | 54 +----- dpnp/backend/extensions/vm/sub.cpp | 171 +++++++++++++++++ dpnp/backend/extensions/vm/sub.hpp | 57 +----- dpnp/backend/extensions/vm/tan.cpp | 139 ++++++++++++++ dpnp/backend/extensions/vm/tan.hpp | 54 +----- dpnp/backend/extensions/vm/tanh.cpp | 139 ++++++++++++++ dpnp/backend/extensions/vm/tanh.hpp | 54 +----- dpnp/backend/extensions/vm/trunc.cpp | 137 ++++++++++++++ dpnp/backend/extensions/vm/trunc.hpp | 54 +----- dpnp/backend/extensions/vm/types_matrix.hpp | 91 --------- dpnp/backend/extensions/vm/vm_py.cpp | 198 +------------------- 15 files changed, 905 insertions(+), 580 deletions(-) create mode 100644 dpnp/backend/extensions/vm/sqr.cpp create mode 100644 dpnp/backend/extensions/vm/sqrt.cpp create mode 100644 dpnp/backend/extensions/vm/sub.cpp create mode 100644 dpnp/backend/extensions/vm/tan.cpp create mode 100644 dpnp/backend/extensions/vm/tanh.cpp create mode 100644 dpnp/backend/extensions/vm/trunc.cpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 95ea7594f355..ba1e46ea0ed8 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -53,6 +53,12 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/rint.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sinh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sqr.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sqrt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sub.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tanh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/trunc.cpp ) set(_module_src diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp new file mode 100644 index 000000000000..4e763a89f897 --- /dev/null +++ b/dpnp/backend/extensions/vm/sqr.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "sqr.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::sqr function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event sqr_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::sqr(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(sqr); +} // namespace impl + +void init_sqr(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto sqr_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_sqr", sqr_pyapi, + "Call `sqr` from OneMKL VM library to performs element by element " + "operation of squaring of vector `src` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto sqr_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_sqr_to_call", sqr_need_to_call_pyapi, + "Check input arguments to answer if `sqr` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sqr.hpp b/dpnp/backend/extensions/vm/sqr.hpp index 8f1d4ac44fd5..2fe78ceead65 100644 --- a/dpnp/backend/extensions/vm/sqr.hpp +++ b/dpnp/backend/extensions/vm/sqr.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event sqr_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::SqrOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::sqr(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct SqrContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::SqrOutputType::value_type, void>) - { - return nullptr; - } - else { - return sqr_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_sqr(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp new file mode 100644 index 000000000000..c25c6304ae1e --- /dev/null +++ b/dpnp/backend/extensions/vm/sqrt.cpp @@ -0,0 +1,140 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "sqrt.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::sqrt function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event sqrt_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::sqrt(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(sqrt); +} // namespace impl + +void init_sqrt(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto sqrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_sqrt", sqrt_pyapi, + "Call `sqrt` from OneMKL VM library to performs element by element " + "operation of extracting the square root " + "of vector `src` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto sqrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_sqrt_to_call", sqrt_need_to_call_pyapi, + "Check input arguments to answer if `sqrt` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sqrt.hpp b/dpnp/backend/extensions/vm/sqrt.hpp index e3984133628c..08d37049580d 100644 --- a/dpnp/backend/extensions/vm/sqrt.hpp +++ b/dpnp/backend/extensions/vm/sqrt.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event sqrt_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::SqrtOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::sqrt(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct SqrtContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::SqrtOutputType::value_type, void>) - { - return nullptr; - } - else { - return sqrt_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_sqrt(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp new file mode 100644 index 000000000000..4f5197469bb0 --- /dev/null +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -0,0 +1,171 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "sub.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::sub function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event sub_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + ssize_t a_offset, + const char *in_b, + ssize_t b_offset, + char *out_y, + ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::sub(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(sub); +} // namespace impl + +void init_sub(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto sub_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, + arrayT dst, const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_sub", sub_pyapi, + "Call `sub` function from OneMKL VM library to performs element " + "by element subtraction of vector `src1` by vector `src2` " + "to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto sub_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, + arrayT src2, arrayT dst) { + return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_sub_to_call", sub_need_to_call_pyapi, + "Check input arguments to answer if `sub` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/sub.hpp b/dpnp/backend/extensions/vm/sub.hpp index e1a2464b8675..059a78dcbda0 100644 --- a/dpnp/backend/extensions/vm/sub.hpp +++ b/dpnp/backend/extensions/vm/sub.hpp @@ -25,58 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event sub_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - const char *in_b, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - const T *b = reinterpret_cast(in_b); - using resTy = typename types::SubOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::sub(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing 1st input vector of size n - b, // pointer `b` containing 2nd input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct SubContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::SubOutputType::value_type, void>) - { - return nullptr; - } - else { - return sub_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_sub(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp new file mode 100644 index 000000000000..b21c49a4d995 --- /dev/null +++ b/dpnp/backend/extensions/vm/tan.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "tan.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::tan function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event tan_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::tan(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(tan); +} // namespace impl + +void init_tan(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto tan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_tan", tan_pyapi, + "Call `tan` function from OneMKL VM library to compute " + "the tangent of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto tan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_tan_to_call", tan_need_to_call_pyapi, + "Check input arguments to answer if `tan` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/tan.hpp b/dpnp/backend/extensions/vm/tan.hpp index d759ea46fe13..6fcfed9f8160 100644 --- a/dpnp/backend/extensions/vm/tan.hpp +++ b/dpnp/backend/extensions/vm/tan.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event tan_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::TanOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::tan(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct TanContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::TanOutputType::value_type, void>) - { - return nullptr; - } - else { - return tan_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_tan(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp new file mode 100644 index 000000000000..4a65b022c44f --- /dev/null +++ b/dpnp/backend/extensions/vm/tanh.cpp @@ -0,0 +1,139 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "tanh.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::tanh function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event tanh_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::tanh(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(tanh); +} // namespace impl + +void init_tanh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto tanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_tanh", tanh_pyapi, + "Call `tanh` function from OneMKL VM library to compute " + "the hyperbolic tangent of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto tanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_tanh_to_call", tanh_need_to_call_pyapi, + "Check input arguments to answer if `tanh` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/tanh.hpp b/dpnp/backend/extensions/vm/tanh.hpp index 98909685ff2b..9afbe1eb480b 100644 --- a/dpnp/backend/extensions/vm/tanh.hpp +++ b/dpnp/backend/extensions/vm/tanh.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event tanh_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::TanhOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::tanh(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct TanhContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::TanhOutputType::value_type, void>) - { - return nullptr; - } - else { - return tanh_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_tanh(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp new file mode 100644 index 000000000000..cb9fe25ccc3e --- /dev/null +++ b/dpnp/backend/extensions/vm/trunc.cpp @@ -0,0 +1,137 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "trunc.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; +namespace vm_ext = dpnp::backend::ext::vm; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::trunc function. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event trunc_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + char *out_y, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + + std::int64_t n = static_cast(in_n); + const T *a = reinterpret_cast(in_a); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::trunc(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_VECTORS(trunc); +} // namespace impl + +void init_trunc(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_vectors(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto trunc_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrVector{}); + }; + m.def("_trunc", trunc_pyapi, + "Call `trunc` function from OneMKL VM library to compute " + "the truncated value of vector elements", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), + py::arg("depends") = py::list()); + + auto trunc_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, + arrayT dst) { + return vm_ext::need_to_call_unary_ufunc( + exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); + }; + m.def("_mkl_trunc_to_call", trunc_need_to_call_pyapi, + "Check input arguments to answer if `trunc` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/trunc.hpp b/dpnp/backend/extensions/vm/trunc.hpp index c06c7cf566fe..0b430fd1efc2 100644 --- a/dpnp/backend/extensions/vm/trunc.hpp +++ b/dpnp/backend/extensions/vm/trunc.hpp @@ -25,55 +25,11 @@ #pragma once -#include +#include -#include "common.hpp" -#include "types_matrix.hpp" +namespace py = pybind11; -namespace dpnp +namespace dpnp::extensions::vm { -namespace backend -{ -namespace ext -{ -namespace vm -{ -template -sycl::event trunc_contig_impl(sycl::queue exec_q, - const std::int64_t n, - const char *in_a, - char *out_y, - const std::vector &depends) -{ - type_utils::validate_type_for_device(exec_q); - - const T *a = reinterpret_cast(in_a); - using resTy = typename types::TruncOutputType::value_type; - resTy *y = reinterpret_cast(out_y); - - return mkl_vm::trunc(exec_q, - n, // number of elements to be calculated - a, // pointer `a` containing input vector of size n - y, // pointer `y` to the output vector of size n - depends); -} - -template -struct TruncContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename types::TruncOutputType::value_type, void>) - { - return nullptr; - } - else { - return trunc_contig_impl; - } - } -}; -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp +void init_trunc(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp index 6336b14f7158..35892d887eaf 100644 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ b/dpnp/backend/extensions/vm/types_matrix.hpp @@ -43,97 +43,6 @@ namespace vm { namespace types { -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::sqr function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct SqrOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::sqrt function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct SqrtOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::sub function. - * - * @tparam T Type of input vectors `a` and `b` and of result vector `y`. - */ -template -struct SubOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - T, - std::complex, - std::complex>, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::BinaryTypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::tan function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct TanOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::tanh function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct TanhOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry>, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - /** * @brief A factory to define pairs of supported types for which * MKL VM library provides support in oneapi::mkl::vm::trunc function. diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index f1ce3d6ea86b..0e6030a4c706 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -27,9 +27,6 @@ // //***************************************************************************** -#include -#include - #include "abs.hpp" #include "acos.hpp" #include "acosh.hpp" @@ -66,27 +63,11 @@ #include "tan.hpp" #include "tanh.hpp" #include "trunc.hpp" -#include "types_matrix.hpp" -namespace py = pybind11; -namespace vm_ext = dpnp::backend::ext::vm; namespace vm_ns = dpnp::extensions::vm; -using vm_ext::binary_impl_fn_ptr_t; -using vm_ext::unary_impl_fn_ptr_t; - -static unary_impl_fn_ptr_t sqr_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t sqrt_dispatch_vector[dpctl_td_ns::num_types]; -static binary_impl_fn_ptr_t sub_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t tan_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t tanh_dispatch_vector[dpctl_td_ns::num_types]; -static unary_impl_fn_ptr_t trunc_dispatch_vector[dpctl_td_ns::num_types]; - PYBIND11_MODULE(_vm_impl, m) { - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - vm_ns::init_abs(m); vm_ns::init_acos(m); vm_ns::init_acosh(m); @@ -116,177 +97,10 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_rint(m); vm_ns::init_sin(m); vm_ns::init_sinh(m); - - // UnaryUfunc: ==== Sqr(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - sqr_dispatch_vector); - - auto sqr_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - sqr_dispatch_vector); - }; - m.def( - "_sqr", sqr_pyapi, - "Call `sqr` from OneMKL VM library to performs element by element " - "operation of squaring of vector `src` to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto sqr_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - sqr_dispatch_vector); - }; - m.def("_mkl_sqr_to_call", sqr_need_to_call_pyapi, - "Check input arguments to answer if `sqr` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Sqrt(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - sqrt_dispatch_vector); - - auto sqrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - sqrt_dispatch_vector); - }; - m.def( - "_sqrt", sqrt_pyapi, - "Call `sqrt` from OneMKL VM library to performs element by element " - "operation of extracting the square root " - "of vector `src` to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto sqrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - sqrt_dispatch_vector); - }; - m.def("_mkl_sqrt_to_call", sqrt_need_to_call_pyapi, - "Check input arguments to answer if `sqrt` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // BinaryUfunc: ==== Sub(x1, x2) ==== - { - vm_ext::init_ufunc_dispatch_vector( - sub_dispatch_vector); - - auto sub_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { - return vm_ext::binary_ufunc(exec_q, src1, src2, dst, depends, - sub_dispatch_vector); - }; - m.def("_sub", sub_pyapi, - "Call `sub` function from OneMKL VM library to performs element " - "by element subtraction of vector `src1` by vector `src2` " - "to resulting vector `dst`", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("depends") = py::list()); - - auto sub_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - sub_dispatch_vector); - }; - m.def("_mkl_sub_to_call", sub_need_to_call_pyapi, - "Check input arguments to answer if `sub` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), - py::arg("dst")); - } - - // UnaryUfunc: ==== Tan(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - tan_dispatch_vector); - - auto tan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - tan_dispatch_vector); - }; - m.def("_tan", tan_pyapi, - "Call `tan` function from OneMKL VM library to compute " - "tangent of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto tan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - tan_dispatch_vector); - }; - m.def("_mkl_tan_to_call", tan_need_to_call_pyapi, - "Check input arguments to answer if `tan` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Tanh(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - tanh_dispatch_vector); - - auto tanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - tanh_dispatch_vector); - }; - m.def("_tanh", tanh_pyapi, - "Call `tanh` function from OneMKL VM library to compute " - "inverse cosine of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto tanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - tanh_dispatch_vector); - }; - m.def("_mkl_tanh_to_call", tanh_need_to_call_pyapi, - "Check input arguments to answer if `tanh` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } - - // UnaryUfunc: ==== Trunc(x) ==== - { - vm_ext::init_ufunc_dispatch_vector( - trunc_dispatch_vector); - - auto trunc_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { - return vm_ext::unary_ufunc(exec_q, src, dst, depends, - trunc_dispatch_vector); - }; - m.def("_trunc", trunc_pyapi, - "Call `trunc` function from OneMKL VM library to compute " - "the truncated value of vector elements", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), - py::arg("depends") = py::list()); - - auto trunc_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { - return vm_ext::need_to_call_unary_ufunc(exec_q, src, dst, - trunc_dispatch_vector); - }; - m.def("_mkl_trunc_to_call", trunc_need_to_call_pyapi, - "Check input arguments to answer if `trunc` function from " - "OneMKL VM library can be used", - py::arg("sycl_queue"), py::arg("src"), py::arg("dst")); - } + vm_ns::init_sqr(m); + vm_ns::init_sqrt(m); + vm_ns::init_sub(m); + vm_ns::init_tan(m); + vm_ns::init_tanh(m); + vm_ns::init_trunc(m); } From fda9ec43a154fb8da84bfab07e5b6e725280f488 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 11 Jun 2024 14:39:35 +0200 Subject: [PATCH 19/35] Removed unused header with types matrix --- dpnp/backend/extensions/vm/types_matrix.hpp | 65 --------------------- 1 file changed, 65 deletions(-) delete mode 100644 dpnp/backend/extensions/vm/types_matrix.hpp diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp deleted file mode 100644 index 35892d887eaf..000000000000 --- a/dpnp/backend/extensions/vm/types_matrix.hpp +++ /dev/null @@ -1,65 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2023-2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include - -// dpctl tensor headers -#include "utils/type_dispatch.hpp" - -// dpctl namespace for types dispatching -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -namespace dpnp -{ -namespace backend -{ -namespace ext -{ -namespace vm -{ -namespace types -{ -/** - * @brief A factory to define pairs of supported types for which - * MKL VM library provides support in oneapi::mkl::vm::trunc function. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct TruncOutputType -{ - using value_type = typename std::disjunction< - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::TypeMapResultEntry, - dpctl_td_ns::DefaultResultEntry>::result_type; -}; - -} // namespace types -} // namespace vm -} // namespace ext -} // namespace backend -} // namespace dpnp From 49ffe40d31af4fd2d2316977f5ce9c60658d186c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 11 Jun 2024 15:13:37 +0200 Subject: [PATCH 20/35] Remove unused functions --- dpnp/backend/extensions/vm/abs.cpp | 3 +- dpnp/backend/extensions/vm/acos.cpp | 3 +- dpnp/backend/extensions/vm/acosh.cpp | 3 +- dpnp/backend/extensions/vm/add.cpp | 7 +- dpnp/backend/extensions/vm/asin.cpp | 3 +- dpnp/backend/extensions/vm/asinh.cpp | 3 +- dpnp/backend/extensions/vm/atan.cpp | 3 +- dpnp/backend/extensions/vm/atan2.cpp | 7 +- dpnp/backend/extensions/vm/atanh.cpp | 3 +- dpnp/backend/extensions/vm/cbrt.cpp | 3 +- dpnp/backend/extensions/vm/ceil.cpp | 3 +- dpnp/backend/extensions/vm/common.hpp | 421 +------------------------- dpnp/backend/extensions/vm/conj.cpp | 3 +- dpnp/backend/extensions/vm/cos.cpp | 3 +- dpnp/backend/extensions/vm/cosh.cpp | 3 +- dpnp/backend/extensions/vm/div.cpp | 7 +- dpnp/backend/extensions/vm/exp.cpp | 3 +- dpnp/backend/extensions/vm/exp2.cpp | 3 +- dpnp/backend/extensions/vm/expm1.cpp | 3 +- dpnp/backend/extensions/vm/floor.cpp | 3 +- dpnp/backend/extensions/vm/hypot.cpp | 7 +- dpnp/backend/extensions/vm/ln.cpp | 3 +- dpnp/backend/extensions/vm/log10.cpp | 3 +- dpnp/backend/extensions/vm/log1p.cpp | 3 +- dpnp/backend/extensions/vm/log2.cpp | 3 +- dpnp/backend/extensions/vm/mul.cpp | 7 +- dpnp/backend/extensions/vm/pow.cpp | 7 +- dpnp/backend/extensions/vm/rint.cpp | 3 +- dpnp/backend/extensions/vm/sin.cpp | 3 +- dpnp/backend/extensions/vm/sinh.cpp | 3 +- dpnp/backend/extensions/vm/sqr.cpp | 3 +- dpnp/backend/extensions/vm/sqrt.cpp | 3 +- dpnp/backend/extensions/vm/sub.cpp | 7 +- dpnp/backend/extensions/vm/tan.cpp | 3 +- dpnp/backend/extensions/vm/tanh.cpp | 3 +- dpnp/backend/extensions/vm/trunc.cpp | 3 +- dpnp/backend/extensions/vm/vm_py.cpp | 1 - 37 files changed, 65 insertions(+), 490 deletions(-) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index 4b0b081f0790..5052c6459b47 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_abs(py::module_ m) auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_abs_to_call", abs_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index 3e99ddd38ab8..d7b41cb15e5d 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_acos(py::module_ m) auto acos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_acos_to_call", acos_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index 626f6eb17b6c..e44ae8b62c9f 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_acosh(py::module_ m) auto acosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_acosh_to_call", acosh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index 856ed669e5d8..ea03cc9bb825 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -158,9 +157,9 @@ void init_add(py::module_ m) auto add_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_add_to_call", add_need_to_call_pyapi, "Check input arguments to answer if `add` function from " diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index b00b9a55ad42..e347f69eb4e9 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_asin(py::module_ m) auto asin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_asin_to_call", asin_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index a21cf0d78bde..46ab7d6e03d8 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_asinh(py::module_ m) auto asinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_asinh_to_call", asinh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index cdcf437fcb06..76f367981bf9 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_atan(py::module_ m) auto atan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_atan_to_call", atan_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index ff58b81d52e6..3da53a8b48af 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -147,9 +146,9 @@ void init_atan2(py::module_ m) auto atan2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_atan2_to_call", atan2_need_to_call_pyapi, "Check input arguments to answer if `atan2` function from " diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index 9ab2b56332a6..1fcaed973052 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_atanh(py::module_ m) auto atanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_atanh_to_call", atanh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp index 4e662caa1746..34e0d786caf3 100644 --- a/dpnp/backend/extensions/vm/cbrt.cpp +++ b/dpnp/backend/extensions/vm/cbrt.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_cbrt(py::module_ m) auto cbrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_cbrt_to_call", cbrt_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp index e4e7136b764f..d51fe6cc442a 100644 --- a/dpnp/backend/extensions/vm/ceil.cpp +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_ceil(py::module_ m) auto ceil_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_ceil_to_call", ceil_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index e2fcc9e2dde0..3db084740615 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -25,415 +25,26 @@ #pragma once -#include #include +#include #include -#include #include // dpctl tensor headers #include "utils/memory_overlap.hpp" #include "utils/type_dispatch.hpp" -#include "utils/type_utils.hpp" #include "dpnp_utils.hpp" static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED, "OneMKL does not meet minimum version requirement"); -// OneMKL namespace with VM functions -namespace mkl_vm = oneapi::mkl::vm; - -// dpctl namespace for type utils -namespace type_utils = dpctl::tensor::type_utils; - -namespace dpnp::backend::ext::vm -{ -typedef sycl::event (*unary_impl_fn_ptr_t)(sycl::queue, - const std::int64_t, - const char *, - char *, - const std::vector &); - -typedef sycl::event (*binary_impl_fn_ptr_t)(sycl::queue, - const std::int64_t, - const char *, - const char *, - char *, - const std::vector &); - -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; -template -std::pair - unary_ufunc(sycl::queue exec_q, - dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, // dst = op(src), elementwise - const std::vector &depends, - const dispatchT &dispatch_vector) -{ - // check type_nums - int src_typenum = src.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int src_typeid = array_types.typenum_to_lookup_id(src_typenum); - - // check that queues are compatible - if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues."); - } - - // check that dimensions are the same - int dst_nd = dst.get_ndim(); - if (dst_nd != src.get_ndim()) { - throw py::value_error( - "Input and output arrays have have different dimensions."); - } - - // check that shapes are the same - const py::ssize_t *src_shape = src.get_shape_raw(); - const py::ssize_t *dst_shape = dst.get_shape_raw(); - bool shapes_equal(true); - size_t src_nelems(1); - - for (int i = 0; i < dst_nd; ++i) { - src_nelems *= static_cast(src_shape[i]); - shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); - } - if (!shapes_equal) { - throw py::value_error("Input and output arrays have different shapes."); - } - - // if nelems is zero, return - if (src_nelems == 0) { - return std::make_pair(sycl::event(), sycl::event()); - } - - // ensure that output is ample enough to accommodate all elements - auto dst_offsets = dst.get_minmax_offsets(); - // destination must be ample enough to accommodate all elements - { - size_t range = - static_cast(dst_offsets.second - dst_offsets.first); - if (range + 1 < src_nelems) { - throw py::value_error( - "Destination array can not accommodate all the elements " - "of source array."); - } - } - - // check memory overlap - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src, dst)) { - throw py::value_error("Arrays index overlapping segments of memory."); - } - - const char *src_data = src.get_data(); - char *dst_data = dst.get_data(); - - // handle contiguous inputs - bool is_src_c_contig = src.is_c_contiguous(); - bool is_dst_c_contig = dst.is_c_contiguous(); - - bool all_c_contig = (is_src_c_contig && is_dst_c_contig); - if (!all_c_contig) { - throw py::value_error("Input and outpur arrays must be C-contiguous."); - } - - auto dispatch_fn = dispatch_vector[src_typeid]; - if (dispatch_fn == nullptr) { - throw py::value_error("No implementation is defined for ufunc."); - } - sycl::event comp_ev = - dispatch_fn(exec_q, src_nelems, src_data, dst_data, depends); - - sycl::event ht_ev = - dpctl::utils::keep_args_alive(exec_q, {src, dst}, {comp_ev}); - return std::make_pair(ht_ev, comp_ev); -} - -template -std::pair binary_ufunc( - sycl::queue exec_q, - dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise - const std::vector &depends, - const dispatchT &dispatch_vector) -{ - // check type_nums - int src1_typenum = src1.get_typenum(); - int src2_typenum = src2.get_typenum(); - - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); - int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); - - if (src1_typeid != src2_typeid) { - throw py::value_error("Input arrays have different types."); - } - - // check that queues are compatible - if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues."); - } - - // check shapes, broadcasting is assumed done by caller - // check that dimensions are the same - int dst_nd = dst.get_ndim(); - if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) { - throw py::value_error("Array dimensions are not the same."); - } - - // check that shapes are the same - const py::ssize_t *src1_shape = src1.get_shape_raw(); - const py::ssize_t *src2_shape = src2.get_shape_raw(); - const py::ssize_t *dst_shape = dst.get_shape_raw(); - bool shapes_equal(true); - size_t src_nelems(1); - - for (int i = 0; i < dst_nd; ++i) { - src_nelems *= static_cast(src1_shape[i]); - shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && - src2_shape[i] == dst_shape[i]); - } - if (!shapes_equal) { - throw py::value_error("Array shapes are not the same."); - } - - // if nelems is zero, return - if (src_nelems == 0) { - return std::make_pair(sycl::event(), sycl::event()); - } - - // ensure that output is ample enough to accommodate all elements - auto dst_offsets = dst.get_minmax_offsets(); - // destination must be ample enough to accommodate all elements - { - size_t range = - static_cast(dst_offsets.second - dst_offsets.first); - if (range + 1 < src_nelems) { - throw py::value_error( - "Destination array can not accommodate all the " - "elements of source array."); - } - } - - // check memory overlap - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src1, dst) || overlap(src2, dst)) { - throw py::value_error("Arrays index overlapping segments of memory."); - } - - const char *src1_data = src1.get_data(); - const char *src2_data = src2.get_data(); - char *dst_data = dst.get_data(); - - // handle contiguous inputs - bool is_src1_c_contig = src1.is_c_contiguous(); - bool is_src2_c_contig = src2.is_c_contiguous(); - bool is_dst_c_contig = dst.is_c_contiguous(); - - bool all_c_contig = - (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); - if (!all_c_contig) { - throw py::value_error("Input and outpur arrays must be C-contiguous."); - } - - auto dispatch_fn = dispatch_vector[src1_typeid]; - if (dispatch_fn == nullptr) { - throw py::value_error("No implementation is defined for ufunc."); - } - sycl::event comp_ev = dispatch_fn(exec_q, src_nelems, src1_data, src2_data, - dst_data, depends); - - sycl::event ht_ev = - dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {comp_ev}); - return std::make_pair(ht_ev, comp_ev); -} - -template -bool need_to_call_unary_ufunc(sycl::queue exec_q, - dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, - const dispatchT &dispatch_vector) -{ - // check type_nums - int src_typenum = src.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int src_typeid = array_types.typenum_to_lookup_id(src_typenum); - - // OneMKL VM functions perform a copy on host if no double type support - if (!exec_q.get_device().has(sycl::aspect::fp64)) { - return false; - } - - // check that queues are compatible - if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) { - return false; - } - - // dimensions must be the same - int dst_nd = dst.get_ndim(); - if (dst_nd != src.get_ndim()) { - return false; - } - else if (dst_nd == 0) { - // don't call OneMKL for 0d arrays - return false; - } - - // shapes must be the same - const py::ssize_t *src_shape = src.get_shape_raw(); - const py::ssize_t *dst_shape = dst.get_shape_raw(); - bool shapes_equal(true); - size_t src_nelems(1); - - for (int i = 0; i < dst_nd; ++i) { - src_nelems *= static_cast(src_shape[i]); - shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); - } - if (!shapes_equal) { - return false; - } - - // if nelems is zero, return false - if (src_nelems == 0) { - return false; - } - - // ensure that output is ample enough to accommodate all elements - auto dst_offsets = dst.get_minmax_offsets(); - // destination must be ample enough to accommodate all elements - { - size_t range = - static_cast(dst_offsets.second - dst_offsets.first); - if (range + 1 < src_nelems) { - return false; - } - } - - // check memory overlap - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src, dst)) { - return false; - } - - // support only contiguous inputs - bool is_src_c_contig = src.is_c_contiguous(); - bool is_dst_c_contig = dst.is_c_contiguous(); - - bool all_c_contig = (is_src_c_contig && is_dst_c_contig); - if (!all_c_contig) { - return false; - } - - // MKL function is not defined for the type - if (dispatch_vector[src_typeid] == nullptr) { - return false; - } - return true; -} - -template -bool need_to_call_binary_ufunc(sycl::queue exec_q, - dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, - const dispatchT &dispatch_vector) +namespace dpnp::extensions::vm::py_internal { - // check type_nums - int src1_typenum = src1.get_typenum(); - int src2_typenum = src2.get_typenum(); - - auto array_types = dpctl_td_ns::usm_ndarray_types(); - int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); - int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); - - // types must be the same - if (src1_typeid != src2_typeid) { - return false; - } - - // OneMKL VM functions perform a copy on host if no double type support - if (!exec_q.get_device().has(sycl::aspect::fp64)) { - return false; - } - - // check that queues are compatible - if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) { - return false; - } - - // dimensions must be the same - int dst_nd = dst.get_ndim(); - if (dst_nd != src1.get_ndim() || dst_nd != src2.get_ndim()) { - return false; - } - else if (dst_nd == 0) { - // don't call OneMKL for 0d arrays - return false; - } - - // shapes must be the same - const py::ssize_t *src1_shape = src1.get_shape_raw(); - const py::ssize_t *src2_shape = src2.get_shape_raw(); - const py::ssize_t *dst_shape = dst.get_shape_raw(); - bool shapes_equal(true); - size_t src_nelems(1); - - for (int i = 0; i < dst_nd; ++i) { - src_nelems *= static_cast(src1_shape[i]); - shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && - src2_shape[i] == dst_shape[i]); - } - if (!shapes_equal) { - return false; - } - - // if nelems is zero, return false - if (src_nelems == 0) { - return false; - } - - // ensure that output is ample enough to accommodate all elements - auto dst_offsets = dst.get_minmax_offsets(); - // destination must be ample enough to accommodate all elements - { - size_t range = - static_cast(dst_offsets.second - dst_offsets.first); - if (range + 1 < src_nelems) { - return false; - } - } - - // check memory overlap - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src1, dst) || overlap(src2, dst)) { - return false; - } - - // support only contiguous inputs - bool is_src1_c_contig = src1.is_c_contiguous(); - bool is_src2_c_contig = src2.is_c_contiguous(); - bool is_dst_c_contig = dst.is_c_contiguous(); - - bool all_c_contig = - (is_src1_c_contig && is_src2_c_contig && is_dst_c_contig); - if (!all_c_contig) { - return false; - } - - // MKL function is not defined for the type - if (dispatch_vector[src1_typeid] == nullptr) { - return false; - } - return true; -} - template bool need_to_call_unary_ufunc(sycl::queue &exec_q, dpctl::tensor::usm_ndarray &src, @@ -445,7 +56,7 @@ bool need_to_call_unary_ufunc(sycl::queue &exec_q, int src_typenum = src.get_typenum(); int dst_typenum = dst.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); + auto array_types = td_ns::usm_ndarray_types(); int src_typeid = array_types.typenum_to_lookup_id(src_typenum); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -540,7 +151,7 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q, int src2_typenum = src2.get_typenum(); int dst_typenum = dst.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); + auto array_types = td_ns::usm_ndarray_types(); int src1_typeid = array_types.typenum_to_lookup_id(src1_typenum); int src2_typeid = array_types.typenum_to_lookup_id(src2_typenum); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -665,10 +276,10 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q, \ static void populate_dispatch_vectors(void) \ { \ - vm_ext::init_ufunc_dispatch_vector( \ + py_internal::init_ufunc_dispatch_vector( \ output_typeid_vector); \ - vm_ext::init_ufunc_dispatch_vector( \ + py_internal::init_ufunc_dispatch_vector( \ contig_dispatch_vector); \ }; @@ -706,30 +317,30 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q, \ static void populate_dispatch_tables(void) \ { \ - vm_ext::init_ufunc_dispatch_table( \ + py_internal::init_ufunc_dispatch_table( \ output_typeid_vector); \ - vm_ext::init_ufunc_dispatch_table( \ + py_internal::init_ufunc_dispatch_table( \ contig_dispatch_vector); \ }; template typename factoryT, - int _num_types = dpctl_td_ns::num_types> + int _num_types = td_ns::num_types> void init_ufunc_dispatch_vector(dispatchT dispatch_vector[]) { - dpctl_td_ns::DispatchVectorBuilder dvb; + td_ns::DispatchVectorBuilder dvb; dvb.populate_dispatch_vector(dispatch_vector); } template typename factoryT, - int _num_types = dpctl_td_ns::num_types> + int _num_types = td_ns::num_types> void init_ufunc_dispatch_table(dispatchT dispatch_table[][_num_types]) { - dpctl_td_ns::DispatchTableBuilder dtb; + td_ns::DispatchTableBuilder dtb; dtb.populate_dispatch_table(dispatch_table); } -} // namespace dpnp::backend::ext::vm +} // namespace dpnp::extensions::vm::py_internal diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp index 6daabd71b72a..20d247feb51a 100644 --- a/dpnp/backend/extensions/vm/conj.cpp +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_conj(py::module_ m) auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_conj_to_call", conj_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp index c4e9e1208990..1b4f7bde8233 100644 --- a/dpnp/backend/extensions/vm/cos.cpp +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_cos(py::module_ m) auto cos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_cos_to_call", cos_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp index 61563a560dda..ad7abeb5b142 100644 --- a/dpnp/backend/extensions/vm/cosh.cpp +++ b/dpnp/backend/extensions/vm/cosh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_cosh(py::module_ m) auto cosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_cosh_to_call", cosh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index eb6b6519ed93..e1cfc779733c 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -158,9 +157,9 @@ void init_div(py::module_ m) auto div_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_div_to_call", div_need_to_call_pyapi, "Check input arguments to answer if `div` function from " diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp index dba1a71ee1d0..9fefe4a1f6f0 100644 --- a/dpnp/backend/extensions/vm/exp.cpp +++ b/dpnp/backend/extensions/vm/exp.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_exp(py::module_ m) auto exp_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_exp_to_call", exp_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp index 3d79e2b28e1b..5a7ed87bfd42 100644 --- a/dpnp/backend/extensions/vm/exp2.cpp +++ b/dpnp/backend/extensions/vm/exp2.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_exp2(py::module_ m) auto exp2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_exp2_to_call", exp2_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp index 535a160f5123..9b20bcf0ee98 100644 --- a/dpnp/backend/extensions/vm/expm1.cpp +++ b/dpnp/backend/extensions/vm/expm1.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_expm1(py::module_ m) auto expm1_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_expm1_to_call", expm1_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp index 21d16c05c361..ddd730d61988 100644 --- a/dpnp/backend/extensions/vm/floor.cpp +++ b/dpnp/backend/extensions/vm/floor.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_floor(py::module_ m) auto floor_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_floor_to_call", floor_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp index 5b3a8275c7ef..d7a093334bac 100644 --- a/dpnp/backend/extensions/vm/hypot.cpp +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -147,9 +146,9 @@ void init_hypot(py::module_ m) auto hypot_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_hypot_to_call", hypot_need_to_call_pyapi, "Check input arguments to answer if `hypot` function from " diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp index 3d4d2997986a..70333142b67e 100644 --- a/dpnp/backend/extensions/vm/ln.cpp +++ b/dpnp/backend/extensions/vm/ln.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_ln(py::module_ m) auto ln_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_ln_to_call", ln_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp index c95c64bcc470..1fcd6fce6474 100644 --- a/dpnp/backend/extensions/vm/log10.cpp +++ b/dpnp/backend/extensions/vm/log10.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_log10(py::module_ m) auto log10_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_log10_to_call", log10_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp index a1bdb3bb0258..2a6ae4a0d8a3 100644 --- a/dpnp/backend/extensions/vm/log1p.cpp +++ b/dpnp/backend/extensions/vm/log1p.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_log1p(py::module_ m) auto log1p_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_log1p_to_call", log1p_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp index 54021c8b0919..24ae2210573c 100644 --- a/dpnp/backend/extensions/vm/log2.cpp +++ b/dpnp/backend/extensions/vm/log2.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_log2(py::module_ m) auto log2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_log2_to_call", log2_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp index 9750f96bfad0..0bd38c881e60 100644 --- a/dpnp/backend/extensions/vm/mul.cpp +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -158,9 +157,9 @@ void init_mul(py::module_ m) auto mul_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_mul_to_call", mul_need_to_call_pyapi, "Check input arguments to answer if `mul` function from " diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp index 4ce45a77d239..f0d4948e5dba 100644 --- a/dpnp/backend/extensions/vm/pow.cpp +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -158,9 +157,9 @@ void init_pow(py::module_ m) auto pow_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_pow_to_call", pow_need_to_call_pyapi, "Check input arguments to answer if `pow` function from " diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp index 6a65352bc01f..e33a866f6cab 100644 --- a/dpnp/backend/extensions/vm/rint.cpp +++ b/dpnp/backend/extensions/vm/rint.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_rint(py::module_ m) auto rint_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_round_to_call", rint_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp index 39abddb9c7f0..f3aff6b21fcd 100644 --- a/dpnp/backend/extensions/vm/sin.cpp +++ b/dpnp/backend/extensions/vm/sin.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_sin(py::module_ m) auto sin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_sin_to_call", sin_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp index 367c54e9b8cd..708b2fc97362 100644 --- a/dpnp/backend/extensions/vm/sinh.cpp +++ b/dpnp/backend/extensions/vm/sinh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_sinh(py::module_ m) auto sinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_sinh_to_call", sinh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp index 4e763a89f897..da603d320167 100644 --- a/dpnp/backend/extensions/vm/sqr.cpp +++ b/dpnp/backend/extensions/vm/sqr.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_sqr(py::module_ m) auto sqr_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_sqr_to_call", sqr_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp index c25c6304ae1e..40773ee8d645 100644 --- a/dpnp/backend/extensions/vm/sqrt.cpp +++ b/dpnp/backend/extensions/vm/sqrt.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -129,7 +128,7 @@ void init_sqrt(py::module_ m) auto sqrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_sqrt_to_call", sqrt_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp index 4f5197469bb0..46add7e75d78 100644 --- a/dpnp/backend/extensions/vm/sub.cpp +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -158,9 +157,9 @@ void init_sub(py::module_ m) auto sub_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, arrayT dst) { - return vm_ext::need_to_call_binary_ufunc(exec_q, src1, src2, dst, - output_typeid_vector, - contig_dispatch_vector); + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); }; m.def("_mkl_sub_to_call", sub_need_to_call_pyapi, "Check input arguments to answer if `sub` function from " diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp index b21c49a4d995..72ccad0d5192 100644 --- a/dpnp/backend/extensions/vm/tan.cpp +++ b/dpnp/backend/extensions/vm/tan.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_tan(py::module_ m) auto tan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_tan_to_call", tan_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp index 4a65b022c44f..933d507758f4 100644 --- a/dpnp/backend/extensions/vm/tanh.cpp +++ b/dpnp/backend/extensions/vm/tanh.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -128,7 +127,7 @@ void init_tanh(py::module_ m) auto tanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_tanh_to_call", tanh_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp index cb9fe25ccc3e..5ad79a17c23e 100644 --- a/dpnp/backend/extensions/vm/trunc.cpp +++ b/dpnp/backend/extensions/vm/trunc.cpp @@ -48,7 +48,6 @@ namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; -namespace vm_ext = dpnp::backend::ext::vm; namespace impl { @@ -126,7 +125,7 @@ void init_trunc(py::module_ m) auto trunc_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst) { - return vm_ext::need_to_call_unary_ufunc( + return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; m.def("_mkl_trunc_to_call", trunc_need_to_call_pyapi, diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 0e6030a4c706..791a8f6d6561 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -38,7 +38,6 @@ #include "atanh.hpp" #include "cbrt.hpp" #include "ceil.hpp" -#include "common.hpp" #include "conj.hpp" #include "cos.hpp" #include "cosh.hpp" From c565600d958ea762ef3139613e1db32ff8544c59 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Jun 2024 11:42:40 +0200 Subject: [PATCH 21/35] Use passing by reference in unary and binary funcs --- dpnp/backend/extensions/vm/abs.cpp | 8 ++++---- dpnp/backend/extensions/vm/acos.cpp | 8 ++++---- dpnp/backend/extensions/vm/acosh.cpp | 8 ++++---- dpnp/backend/extensions/vm/add.cpp | 9 +++++---- dpnp/backend/extensions/vm/asin.cpp | 8 ++++---- dpnp/backend/extensions/vm/asinh.cpp | 8 ++++---- dpnp/backend/extensions/vm/atan.cpp | 8 ++++---- dpnp/backend/extensions/vm/atan2.cpp | 9 +++++---- dpnp/backend/extensions/vm/atanh.cpp | 8 ++++---- dpnp/backend/extensions/vm/cbrt.cpp | 8 ++++---- dpnp/backend/extensions/vm/ceil.cpp | 8 ++++---- dpnp/backend/extensions/vm/common.hpp | 10 +++++----- dpnp/backend/extensions/vm/conj.cpp | 8 ++++---- dpnp/backend/extensions/vm/cos.cpp | 8 ++++---- dpnp/backend/extensions/vm/cosh.cpp | 8 ++++---- dpnp/backend/extensions/vm/div.cpp | 9 +++++---- dpnp/backend/extensions/vm/exp.cpp | 8 ++++---- dpnp/backend/extensions/vm/exp2.cpp | 8 ++++---- dpnp/backend/extensions/vm/expm1.cpp | 8 ++++---- dpnp/backend/extensions/vm/floor.cpp | 8 ++++---- dpnp/backend/extensions/vm/hypot.cpp | 9 +++++---- dpnp/backend/extensions/vm/ln.cpp | 8 ++++---- dpnp/backend/extensions/vm/log10.cpp | 8 ++++---- dpnp/backend/extensions/vm/log1p.cpp | 8 ++++---- dpnp/backend/extensions/vm/log2.cpp | 8 ++++---- dpnp/backend/extensions/vm/mul.cpp | 9 +++++---- dpnp/backend/extensions/vm/pow.cpp | 9 +++++---- dpnp/backend/extensions/vm/rint.cpp | 8 ++++---- dpnp/backend/extensions/vm/sin.cpp | 8 ++++---- dpnp/backend/extensions/vm/sinh.cpp | 8 ++++---- dpnp/backend/extensions/vm/sqr.cpp | 8 ++++---- dpnp/backend/extensions/vm/sqrt.cpp | 8 ++++---- dpnp/backend/extensions/vm/sub.cpp | 9 +++++---- dpnp/backend/extensions/vm/tan.cpp | 8 ++++---- dpnp/backend/extensions/vm/tanh.cpp | 8 ++++---- dpnp/backend/extensions/vm/trunc.cpp | 8 ++++---- 36 files changed, 152 insertions(+), 145 deletions(-) diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp index 5052c6459b47..7eb7086de85e 100644 --- a/dpnp/backend/extensions/vm/abs.cpp +++ b/dpnp/backend/extensions/vm/abs.cpp @@ -111,8 +111,8 @@ void init_abs(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto abs_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto abs_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_abs(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto abs_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto abs_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp index d7b41cb15e5d..ab744bf99c44 100644 --- a/dpnp/backend/extensions/vm/acos.cpp +++ b/dpnp/backend/extensions/vm/acos.cpp @@ -111,8 +111,8 @@ void init_acos(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto acos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto acos_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_acos(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto acos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto acos_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp index e44ae8b62c9f..2cab39313d20 100644 --- a/dpnp/backend/extensions/vm/acosh.cpp +++ b/dpnp/backend/extensions/vm/acosh.cpp @@ -111,8 +111,8 @@ void init_acosh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto acosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto acosh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_acosh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto acosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto acosh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index ea03cc9bb825..c43f07bbcde1 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -133,8 +133,9 @@ void init_add(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto add_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto add_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -155,8 +156,8 @@ void init_add(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto add_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto add_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp index e347f69eb4e9..afbb868e8cca 100644 --- a/dpnp/backend/extensions/vm/asin.cpp +++ b/dpnp/backend/extensions/vm/asin.cpp @@ -111,8 +111,8 @@ void init_asin(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto asin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto asin_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_asin(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto asin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto asin_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp index 46ab7d6e03d8..0f70c3cb5010 100644 --- a/dpnp/backend/extensions/vm/asinh.cpp +++ b/dpnp/backend/extensions/vm/asinh.cpp @@ -111,8 +111,8 @@ void init_asinh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto asinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto asinh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_asinh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto asinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto asinh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp index 76f367981bf9..59f7064ef156 100644 --- a/dpnp/backend/extensions/vm/atan.cpp +++ b/dpnp/backend/extensions/vm/atan.cpp @@ -111,8 +111,8 @@ void init_atan(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto atan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto atan_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_atan(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto atan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto atan_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index 3da53a8b48af..30bb59c9c422 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -123,8 +123,9 @@ void init_atan2(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto atan2_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto atan2_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -144,8 +145,8 @@ void init_atan2(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto atan2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto atan2_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp index 1fcaed973052..bd32d25f2a6b 100644 --- a/dpnp/backend/extensions/vm/atanh.cpp +++ b/dpnp/backend/extensions/vm/atanh.cpp @@ -111,8 +111,8 @@ void init_atanh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto atanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto atanh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_atanh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto atanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto atanh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp index 34e0d786caf3..88bc82824180 100644 --- a/dpnp/backend/extensions/vm/cbrt.cpp +++ b/dpnp/backend/extensions/vm/cbrt.cpp @@ -109,8 +109,8 @@ void init_cbrt(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto cbrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto cbrt_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_cbrt(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto cbrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto cbrt_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp index d51fe6cc442a..14e7234a54c9 100644 --- a/dpnp/backend/extensions/vm/ceil.cpp +++ b/dpnp/backend/extensions/vm/ceil.cpp @@ -109,8 +109,8 @@ void init_ceil(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto ceil_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto ceil_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_ceil(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto ceil_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto ceil_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp index 3db084740615..74e9f81fa0f7 100644 --- a/dpnp/backend/extensions/vm/common.hpp +++ b/dpnp/backend/extensions/vm/common.hpp @@ -47,8 +47,8 @@ namespace dpnp::extensions::vm::py_internal { template bool need_to_call_unary_ufunc(sycl::queue &exec_q, - dpctl::tensor::usm_ndarray &src, - dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, const output_typesT &output_type_vec, const contig_dispatchT &contig_dispatch_vector) { @@ -140,9 +140,9 @@ bool need_to_call_unary_ufunc(sycl::queue &exec_q, template bool need_to_call_binary_ufunc(sycl::queue &exec_q, - dpctl::tensor::usm_ndarray &src1, - dpctl::tensor::usm_ndarray &src2, - dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, const output_typesT &output_type_table, const contig_dispatchT &contig_dispatch_table) { diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp index 20d247feb51a..edfb4384dad0 100644 --- a/dpnp/backend/extensions/vm/conj.cpp +++ b/dpnp/backend/extensions/vm/conj.cpp @@ -109,8 +109,8 @@ void init_conj(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto conj_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto conj_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_conj(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto conj_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto conj_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp index 1b4f7bde8233..e7925cc32987 100644 --- a/dpnp/backend/extensions/vm/cos.cpp +++ b/dpnp/backend/extensions/vm/cos.cpp @@ -111,8 +111,8 @@ void init_cos(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto cos_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto cos_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_cos(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto cos_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto cos_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp index ad7abeb5b142..bb883c97c33e 100644 --- a/dpnp/backend/extensions/vm/cosh.cpp +++ b/dpnp/backend/extensions/vm/cosh.cpp @@ -111,8 +111,8 @@ void init_cosh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto cosh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto cosh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_cosh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto cosh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto cosh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index e1cfc779733c..8cdb547feb4e 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -133,8 +133,9 @@ void init_div(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto div_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto div_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -155,8 +156,8 @@ void init_div(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto div_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto div_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp index 9fefe4a1f6f0..b7f8d4422d18 100644 --- a/dpnp/backend/extensions/vm/exp.cpp +++ b/dpnp/backend/extensions/vm/exp.cpp @@ -111,8 +111,8 @@ void init_exp(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto exp_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto exp_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_exp(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto exp_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto exp_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp index 5a7ed87bfd42..8b5d7a7c5ff3 100644 --- a/dpnp/backend/extensions/vm/exp2.cpp +++ b/dpnp/backend/extensions/vm/exp2.cpp @@ -109,8 +109,8 @@ void init_exp2(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto exp2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto exp2_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_exp2(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto exp2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto exp2_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp index 9b20bcf0ee98..b27668ba7c48 100644 --- a/dpnp/backend/extensions/vm/expm1.cpp +++ b/dpnp/backend/extensions/vm/expm1.cpp @@ -109,8 +109,8 @@ void init_expm1(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto expm1_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto expm1_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_expm1(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto expm1_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto expm1_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp index ddd730d61988..8a32f40e0ffb 100644 --- a/dpnp/backend/extensions/vm/floor.cpp +++ b/dpnp/backend/extensions/vm/floor.cpp @@ -109,8 +109,8 @@ void init_floor(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto floor_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto floor_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_floor(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto floor_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto floor_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp index d7a093334bac..42dd81271111 100644 --- a/dpnp/backend/extensions/vm/hypot.cpp +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -123,8 +123,9 @@ void init_hypot(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto hypot_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto hypot_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -144,8 +145,8 @@ void init_hypot(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto hypot_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto hypot_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp index 70333142b67e..2eb321a3777a 100644 --- a/dpnp/backend/extensions/vm/ln.cpp +++ b/dpnp/backend/extensions/vm/ln.cpp @@ -111,8 +111,8 @@ void init_ln(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto ln_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto ln_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_ln(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto ln_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto ln_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp index 1fcd6fce6474..e685e5fce601 100644 --- a/dpnp/backend/extensions/vm/log10.cpp +++ b/dpnp/backend/extensions/vm/log10.cpp @@ -111,8 +111,8 @@ void init_log10(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto log10_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto log10_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_log10(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto log10_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto log10_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp index 2a6ae4a0d8a3..2db1491e5ebd 100644 --- a/dpnp/backend/extensions/vm/log1p.cpp +++ b/dpnp/backend/extensions/vm/log1p.cpp @@ -109,8 +109,8 @@ void init_log1p(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto log1p_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto log1p_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_log1p(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto log1p_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto log1p_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp index 24ae2210573c..a6800185c256 100644 --- a/dpnp/backend/extensions/vm/log2.cpp +++ b/dpnp/backend/extensions/vm/log2.cpp @@ -109,8 +109,8 @@ void init_log2(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto log2_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto log2_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_log2(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto log2_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto log2_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp index 0bd38c881e60..34007fbc07c2 100644 --- a/dpnp/backend/extensions/vm/mul.cpp +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -133,8 +133,9 @@ void init_mul(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto mul_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto mul_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -155,8 +156,8 @@ void init_mul(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto mul_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto mul_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp index f0d4948e5dba..65acd2ece44b 100644 --- a/dpnp/backend/extensions/vm/pow.cpp +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -133,8 +133,9 @@ void init_pow(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto pow_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto pow_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -155,8 +156,8 @@ void init_pow(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto pow_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto pow_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp index e33a866f6cab..ee0edbecd23e 100644 --- a/dpnp/backend/extensions/vm/rint.cpp +++ b/dpnp/backend/extensions/vm/rint.cpp @@ -109,8 +109,8 @@ void init_rint(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto rint_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto rint_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_rint(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto rint_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto rint_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp index f3aff6b21fcd..55d9f8ed301e 100644 --- a/dpnp/backend/extensions/vm/sin.cpp +++ b/dpnp/backend/extensions/vm/sin.cpp @@ -111,8 +111,8 @@ void init_sin(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto sin_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto sin_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_sin(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto sin_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto sin_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp index 708b2fc97362..f8ddbc580ebc 100644 --- a/dpnp/backend/extensions/vm/sinh.cpp +++ b/dpnp/backend/extensions/vm/sinh.cpp @@ -111,8 +111,8 @@ void init_sinh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto sinh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto sinh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_sinh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto sinh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto sinh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp index da603d320167..f42427ea00fc 100644 --- a/dpnp/backend/extensions/vm/sqr.cpp +++ b/dpnp/backend/extensions/vm/sqr.cpp @@ -109,8 +109,8 @@ void init_sqr(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto sqr_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto sqr_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_sqr(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto sqr_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto sqr_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp index 40773ee8d645..70ebbf298fd3 100644 --- a/dpnp/backend/extensions/vm/sqrt.cpp +++ b/dpnp/backend/extensions/vm/sqrt.cpp @@ -111,8 +111,8 @@ void init_sqrt(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto sqrt_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto sqrt_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -126,8 +126,8 @@ void init_sqrt(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto sqrt_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto sqrt_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp index 46add7e75d78..4ec1bdc36b50 100644 --- a/dpnp/backend/extensions/vm/sub.cpp +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -133,8 +133,9 @@ void init_sub(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto sub_pyapi = [&](sycl::queue exec_q, arrayT src1, arrayT src2, - arrayT dst, const event_vecT &depends = {}) { + auto sub_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { return py_int::py_binary_ufunc( src1, src2, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -155,8 +156,8 @@ void init_sub(py::module_ m) py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("depends") = py::list()); - auto sub_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src1, - arrayT src2, arrayT dst) { + auto sub_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, output_typeid_vector, contig_dispatch_vector); diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp index 72ccad0d5192..250c38387227 100644 --- a/dpnp/backend/extensions/vm/tan.cpp +++ b/dpnp/backend/extensions/vm/tan.cpp @@ -111,8 +111,8 @@ void init_tan(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto tan_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto tan_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_tan(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto tan_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto tan_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp index 933d507758f4..d0e9ecc1669a 100644 --- a/dpnp/backend/extensions/vm/tanh.cpp +++ b/dpnp/backend/extensions/vm/tanh.cpp @@ -111,8 +111,8 @@ void init_tanh(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto tanh_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto tanh_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -125,8 +125,8 @@ void init_tanh(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto tanh_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto tanh_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp index 5ad79a17c23e..f47da825719c 100644 --- a/dpnp/backend/extensions/vm/trunc.cpp +++ b/dpnp/backend/extensions/vm/trunc.cpp @@ -109,8 +109,8 @@ void init_trunc(py::module_ m) using impl::contig_dispatch_vector; using impl::output_typeid_vector; - auto trunc_pyapi = [&](sycl::queue exec_q, arrayT src, arrayT dst, - const event_vecT &depends = {}) { + auto trunc_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst, const event_vecT &depends = {}) { return py_int::py_unary_ufunc( src, dst, exec_q, depends, output_typeid_vector, contig_dispatch_vector, @@ -123,8 +123,8 @@ void init_trunc(py::module_ m) py::arg("sycl_queue"), py::arg("src"), py::arg("dst"), py::arg("depends") = py::list()); - auto trunc_need_to_call_pyapi = [&](sycl::queue exec_q, arrayT src, - arrayT dst) { + auto trunc_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src, + const arrayT &dst) { return py_internal::need_to_call_unary_ufunc( exec_q, src, dst, output_typeid_vector, contig_dispatch_vector); }; From b901b5b881767719e6f87208f32786a8a65371f6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 4 Jun 2024 13:15:31 +0200 Subject: [PATCH 22/35] Implement dpnp.fabs function --- dpnp/CMakeLists.txt | 1 + dpnp/backend/extensions/ufunc/CMakeLists.txt | 80 ++++++++ .../ufunc/elementwise_functions/common.cpp | 41 ++++ .../ufunc/elementwise_functions/common.hpp | 36 ++++ .../ufunc/elementwise_functions/fabs.cpp | 117 +++++++++++ .../ufunc/elementwise_functions/fabs.hpp | 35 ++++ dpnp/backend/extensions/ufunc/ufunc_py.cpp | 35 ++++ .../kernels/elementwise_functions/fabs.hpp | 184 ++++++++++++++++++ 8 files changed, 529 insertions(+) create mode 100644 dpnp/backend/extensions/ufunc/CMakeLists.txt create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp create mode 100644 dpnp/backend/extensions/ufunc/ufunc_py.cpp create mode 100644 dpnp/backend/kernels/elementwise_functions/fabs.hpp diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 9c79d5af385e..d9c95b62c0be 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -60,6 +60,7 @@ add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/lapack) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/sycl_ext) +add_subdirectory(backend/extensions/ufunc) add_subdirectory(dpnp_algo) add_subdirectory(dpnp_utils) diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt new file mode 100644 index 000000000000..a0b8bad02025 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -0,0 +1,80 @@ +# ***************************************************************************** +# Copyright (c) 2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +set(_elementwise_sources + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp +) + +set(python_module_name _ufunc_impl) +set(_module_src + ${CMAKE_CURRENT_SOURCE_DIR}/ufunc_py.cpp + ${_elementwise_sources} +) + +pybind11_add_module(${python_module_name} MODULE ${_module_src}) +add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +# if (MKL_VERSION_2024) +# target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::VM) +# else() +# target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) +# endif() + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/ufunc" +) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp new file mode 100644 index 000000000000..54676f12dc9b --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -0,0 +1,41 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include "fabs.hpp" + +namespace py = pybind11; + +namespace dpnp::backend::ext::ufunc +{ + +/*! @brief Add elementwise functions to Python module */ +void init_elementwise_functions(py::module_ m) +{ + init_fabs(m); +} + +} // namespace dpnp::backend::ext::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp new file mode 100644 index 000000000000..028400e16f2d --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp @@ -0,0 +1,36 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::backend::ext::ufunc +{ +extern void init_elementwise_functions(py::module_); + +} // namespace dpnp::backend::ext::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp new file mode 100644 index 000000000000..b5a89087f508 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -0,0 +1,117 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "fabs.hpp" +// #include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/fabs.hpp" + +// dpctl tensor headers +dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpnp::backend::ext::ufunc +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace py_int = dpctl::tensor::py_internal; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +namespace impl +{ + +namespace fabs_fn_ns = dpnp::backend::kernels::fabs; + +static unary_contig_impl_fn_ptr_t fabs_contig_dispatch_vector[td_ns::num_types]; +static int fabs_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + fabs_strided_dispatch_vector[td_ns::num_types]; + +void populate_fabs_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = fabs_fn_ns; + + using fn_ns::FabsContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(fabs_contig_dispatch_vector); + + using fn_ns::FabsStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(fabs_strided_dispatch_vector); + + using fn_ns::FabsTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(fabs_output_typeid_vector); +}; + +} // namespace impl + +void init_fabs(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_fabs_dispatch_vectors(); + using impl::fabs_contig_dispatch_vector; + using impl::fabs_output_typeid_vector; + using impl::fabs_strided_dispatch_vector; + + auto fabs_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_int::py_unary_ufunc( + src, dst, exec_q, depends, fabs_output_typeid_vector, + fabs_contig_dispatch_vector, fabs_strided_dispatch_vector); + }; + m.def("_fabs", fabs_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto fabs_result_type_pyapi = [&](const py::dtype &dtype) { + return py_int::py_unary_ufunc_result_type(dtype, fabs_output_typeid_vector); + }; + m.def("_fabs_result_type", fabs_result_type_pyapi); + } +} + +} // namespace dpnp::backend::ext::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp new file mode 100644 index 000000000000..2cf94dd7a87b --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once +#include + +namespace py = pybind11; + +namespace dpnp::backend::ext::ufunc +{ +extern void init_fabs(py::module_ m); + +} // namespace dpnp::backend::ext::ufunc diff --git a/dpnp/backend/extensions/ufunc/ufunc_py.cpp b/dpnp/backend/extensions/ufunc/ufunc_py.cpp new file mode 100644 index 000000000000..9477269f7d97 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/ufunc_py.cpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include "elementwise_functions/common.hpp" + +namespace py = pybind11; + +PYBIND11_MODULE(_ufunc_impl, m) +{ + dpnp::backend::ext::ufunc::init_elementwise_functions(m); +} \ No newline at end of file diff --git a/dpnp/backend/kernels/elementwise_functions/fabs.hpp b/dpnp/backend/kernels/elementwise_functions/fabs.hpp new file mode 100644 index 000000000000..638dd05fc609 --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/fabs.hpp @@ -0,0 +1,184 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::backend::kernels::fabs +{ + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct FabsFunctor +{ + + using is_constant = typename std::false_type; + // constexpr resT constant_value = resT{}; + using supports_vec = typename std::false_type; + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &x) const + { + + // if constexpr (std::is_same_v || (std::is_integral::value && std::is_unsigned::value)) + // { + // return resT(x); + // } + // else if constexpr (std::is_same_v || std::is_floating_point_v) + // { + // return sycl::fabs(x); + // } + // else // std::is_integral::value + // { + // return resT(sycl::abs(x)); + // } + return sycl::fabs(x); + } +}; + +template +using FabsContigFunctor = ew_cmn_ns::UnaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template struct FabsOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + // td_ns::TypeMapResultEntry, float>, + // td_ns::TypeMapResultEntry, double>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class fabs_contig_kernel; + +template +sycl::event fabs_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return ew_cmn_ns::unary_contig_impl< + argTy, FabsOutputType, FabsContigFunctor, fabs_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct FabsContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = fabs_contig_impl; + return fn; + } + } +}; + +template struct FabsTypeMapFactory +{ + /*! @brief get typeid for output type of fabs(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename FabsOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +using FabsStridedFunctor = ew_cmn_ns:: + UnaryStridedFunctor>; + +template class fabs_strided_kernel; + +template +sycl::event fabs_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return ew_cmn_ns::unary_strided_impl< + argTy, FabsOutputType, FabsStridedFunctor, fabs_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct FabsStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = fabs_strided_impl; + return fn; + } + } +}; + +} // namespace dpnp::backend::kernels::fabs From 167fa47927d183f8f918c0dd28dab28fe3228808 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Jun 2024 13:36:15 +0200 Subject: [PATCH 23/35] Create an instance of DPNPUnaryFunc for fabs --- dpnp/backend/extensions/ufunc/CMakeLists.txt | 11 +- .../ufunc/elementwise_functions/common.cpp | 10 +- .../ufunc/elementwise_functions/common.hpp | 7 +- .../ufunc/elementwise_functions/fabs.cpp | 36 +++---- .../ufunc/elementwise_functions/fabs.hpp | 8 +- dpnp/backend/extensions/ufunc/ufunc_py.cpp | 5 +- dpnp/backend/include/dpnp_iface_fptr.hpp | 28 +++-- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 9 -- .../kernels/elementwise_functions/fabs.hpp | 101 ++++++++---------- dpnp/dpnp_algo/dpnp_algo.pxd | 1 - dpnp/dpnp_algo/dpnp_algo_mathematical.pxi | 5 - dpnp/dpnp_iface_mathematical.py | 67 +++++++----- 12 files changed, 132 insertions(+), 156 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index a0b8bad02025..7f9a240271b1 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -24,11 +24,16 @@ # ***************************************************************************** set(_elementwise_sources + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp ) set(python_module_name _ufunc_impl) + set(_module_src + # TODO: remove sources from `elementwise_functions` folder + ${CMAKE_CURRENT_SOURCE_DIR}/../elementwise_functions/elementwise_functions_type_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../elementwise_functions/simplify_iteration_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ufunc_py.cpp ${_elementwise_sources} ) @@ -69,12 +74,6 @@ if (DPNP_GENERATE_COVERAGE) target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) endif() -# if (MKL_VERSION_2024) -# target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::VM) -# else() -# target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) -# endif() - install(TARGETS ${python_module_name} DESTINATION "dpnp/backend/extensions/ufunc" ) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index 54676f12dc9b..44173fc764fe 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -29,13 +29,13 @@ namespace py = pybind11; -namespace dpnp::backend::ext::ufunc +namespace dpnp::extensions::ufunc { - -/*! @brief Add elementwise functions to Python module */ +/** + * @brief Add elementwise functions to Python module + */ void init_elementwise_functions(py::module_ m) { init_fabs(m); } - -} // namespace dpnp::backend::ext::ufunc +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp index 028400e16f2d..345ff14308e6 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.hpp @@ -29,8 +29,7 @@ namespace py = pybind11; -namespace dpnp::backend::ext::ufunc +namespace dpnp::extensions::ufunc { -extern void init_elementwise_functions(py::module_); - -} // namespace dpnp::backend::ext::ufunc +void init_elementwise_functions(py::module_); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp index b5a89087f508..11c6a3ad065b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -23,40 +23,39 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "dpctl4pybind11.hpp" -#include -#include -#include #include -#include -#include "fabs.hpp" -// #include "elementwise_functions.hpp" -#include "utils/type_dispatch.hpp" +#include "dpctl4pybind11.hpp" +#include "fabs.hpp" #include "kernels/elementwise_functions/fabs.hpp" +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../../elementwise_functions/elementwise_functions.hpp" + // dpctl tensor headers -dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp #include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" namespace py = pybind11; -namespace dpnp::backend::ext::ufunc +namespace dpnp::extensions::ufunc { - +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py_int = dpnp::extensions::py_internal; namespace td_ns = dpctl::tensor::type_dispatch; -namespace py_int = dpctl::tensor::py_internal; - -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; namespace impl { +namespace fabs_fn_ns = dpnp::kernels::fabs; -namespace fabs_fn_ns = dpnp::backend::kernels::fabs; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; static unary_contig_impl_fn_ptr_t fabs_contig_dispatch_vector[td_ns::num_types]; static int fabs_output_typeid_vector[td_ns::num_types]; @@ -84,7 +83,6 @@ void populate_fabs_dispatch_vectors(void) DispatchVectorBuilder dvb3; dvb3.populate_dispatch_vector(fabs_output_typeid_vector); }; - } // namespace impl void init_fabs(py::module_ m) @@ -108,10 +106,10 @@ void init_fabs(py::module_ m) py::arg("sycl_queue"), py::arg("depends") = py::list()); auto fabs_result_type_pyapi = [&](const py::dtype &dtype) { - return py_int::py_unary_ufunc_result_type(dtype, fabs_output_typeid_vector); + return py_int::py_unary_ufunc_result_type( + dtype, fabs_output_typeid_vector); }; m.def("_fabs_result_type", fabs_result_type_pyapi); } } - -} // namespace dpnp::backend::ext::ufunc +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp index 2cf94dd7a87b..f4a070747ac2 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.hpp @@ -24,12 +24,12 @@ //***************************************************************************** #pragma once + #include namespace py = pybind11; -namespace dpnp::backend::ext::ufunc +namespace dpnp::extensions::ufunc { -extern void init_fabs(py::module_ m); - -} // namespace dpnp::backend::ext::ufunc +void init_fabs(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/ufunc_py.cpp b/dpnp/backend/extensions/ufunc/ufunc_py.cpp index 9477269f7d97..3618bce2cec5 100644 --- a/dpnp/backend/extensions/ufunc/ufunc_py.cpp +++ b/dpnp/backend/extensions/ufunc/ufunc_py.cpp @@ -28,8 +28,9 @@ #include "elementwise_functions/common.hpp" namespace py = pybind11; +namespace ufunc_ns = dpnp::extensions::ufunc; PYBIND11_MODULE(_ufunc_impl, m) { - dpnp::backend::ext::ufunc::init_elementwise_functions(m); -} \ No newline at end of file + ufunc_ns::init_elementwise_functions(m); +} diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index d8e6f8b26e81..0f6ef51bc7ce 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -117,21 +117,19 @@ enum class DPNPFuncName : size_t DPNP_FN_DOT, /**< Used in numpy.dot() impl */ DPNP_FN_DOT_EXT, /**< Used in numpy.dot() impl, requires extra parameters */ DPNP_FN_EDIFF1D, /**< Used in numpy.ediff1d() impl */ - DPNP_FN_EDIFF1D_EXT, /**< Used in numpy.ediff1d() impl, requires extra - parameters */ - DPNP_FN_EIG, /**< Used in numpy.linalg.eig() impl */ - DPNP_FN_EIGVALS, /**< Used in numpy.linalg.eigvals() impl */ - DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ - DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra - parameters */ - DPNP_FN_EYE, /**< Used in numpy.eye() impl */ - DPNP_FN_EXP, /**< Used in numpy.exp() impl */ - DPNP_FN_EXP2, /**< Used in numpy.exp2() impl */ - DPNP_FN_EXPM1, /**< Used in numpy.expm1() impl */ - DPNP_FN_FABS, /**< Used in numpy.fabs() impl */ - DPNP_FN_FABS_EXT, /**< Used in numpy.fabs() impl, requires extra parameters - */ - DPNP_FN_FFT_FFT, /**< Used in numpy.fft.fft() impl */ + DPNP_FN_EDIFF1D_EXT, /**< Used in numpy.ediff1d() impl, requires extra + parameters */ + DPNP_FN_EIG, /**< Used in numpy.linalg.eig() impl */ + DPNP_FN_EIGVALS, /**< Used in numpy.linalg.eigvals() impl */ + DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ + DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra + parameters */ + DPNP_FN_EYE, /**< Used in numpy.eye() impl */ + DPNP_FN_EXP, /**< Used in numpy.exp() impl */ + DPNP_FN_EXP2, /**< Used in numpy.exp2() impl */ + DPNP_FN_EXPM1, /**< Used in numpy.expm1() impl */ + DPNP_FN_FABS, /**< Used in numpy.fabs() impl */ + DPNP_FN_FFT_FFT, /**< Used in numpy.fft.fft() impl */ DPNP_FN_FFT_FFT_EXT, /**< Used in numpy.fft.fft() impl, requires extra parameters */ DPNP_FN_FFT_RFFT, /**< Used in numpy.fft.rfft() impl */ diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index a69a875fc1e7..122a3ccdedd3 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -462,15 +462,6 @@ static void func_map_init_elemwise_1arg_2type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_FABS][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_fabs_c_default}; - fmap[DPNPFuncName::DPNP_FN_FABS_EXT][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_fabs_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FABS_EXT][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_fabs_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FABS_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_fabs_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FABS_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_fabs_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FLOOR][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_floor_c_default}; fmap[DPNPFuncName::DPNP_FN_FLOOR][eft_LNG][eft_LNG] = { diff --git a/dpnp/backend/kernels/elementwise_functions/fabs.hpp b/dpnp/backend/kernels/elementwise_functions/fabs.hpp index 638dd05fc609..22072d5f945b 100644 --- a/dpnp/backend/kernels/elementwise_functions/fabs.hpp +++ b/dpnp/backend/kernels/elementwise_functions/fabs.hpp @@ -29,40 +29,27 @@ // dpctl tensor headers #include "kernels/elementwise_functions/common.hpp" -#include "utils/type_utils.hpp" +#include "utils/type_dispatch.hpp" -namespace dpnp::backend::kernels::fabs +namespace dpnp::kernels::fabs { - namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; namespace td_ns = dpctl::tensor::type_dispatch; -using dpctl::tensor::type_utils::is_complex; - -template struct FabsFunctor +template +struct FabsFunctor { - + // is function constant for given argT using is_constant = typename std::false_type; + // constant value, if constant // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec using supports_vec = typename std::false_type; - using supports_sg_loadstore = typename std::negation< - std::disjunction, is_complex>>; + // do both argT and resT support sugroup store/load operation + using supports_sg_loadstore = typename std::true_type; resT operator()(const argT &x) const { - - // if constexpr (std::is_same_v || (std::is_integral::value && std::is_unsigned::value)) - // { - // return resT(x); - // } - // else if constexpr (std::is_same_v || std::is_floating_point_v) - // { - // return sycl::fabs(x); - // } - // else // std::is_integral::value - // { - // return resT(sycl::abs(x)); - // } return sycl::fabs(x); } }; @@ -73,30 +60,20 @@ template using FabsContigFunctor = ew_cmn_ns::UnaryContigFunctor, - vec_sz, - n_vecs, - enable_sg_loadstore>; - -template struct FabsOutputType + resT, + FabsFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +struct FabsOutputType { using value_type = typename std::disjunction< // disjunction is C++17 // feature, supported by DPC++ - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, td_ns::TypeMapResultEntry, td_ns::TypeMapResultEntry, td_ns::TypeMapResultEntry, - // td_ns::TypeMapResultEntry, float>, - // td_ns::TypeMapResultEntry, double>, td_ns::DefaultResultEntry>::result_type; }; @@ -110,12 +87,13 @@ sycl::event fabs_contig_impl(sycl::queue &exec_q, char *res_p, const std::vector &depends = {}) { - return ew_cmn_ns::unary_contig_impl< - argTy, FabsOutputType, FabsContigFunctor, fabs_contig_kernel>( + return ew_cmn_ns::unary_contig_impl( exec_q, nelems, arg_p, res_p, depends); } -template struct FabsContigFactory +template +struct FabsContigFactory { fnT get() { @@ -131,9 +109,12 @@ template struct FabsContigFactory } }; -template struct FabsTypeMapFactory +template +struct FabsTypeMapFactory { - /*! @brief get typeid for output type of fabs(T x) */ + /** + * @brief get typeid for output type of fabs(T x) + */ std::enable_if_t::value, int> get() { using rT = typename FabsOutputType::value_type; @@ -145,19 +126,21 @@ template using FabsStridedFunctor = ew_cmn_ns:: UnaryStridedFunctor>; -template class fabs_strided_kernel; +template +class fabs_strided_kernel; template -sycl::event fabs_strided_impl(sycl::queue &exec_q, - size_t nelems, - int nd, - const ssize_t *shape_and_strides, - const char *arg_p, - ssize_t arg_offset, - char *res_p, - ssize_t res_offset, - const std::vector &depends, - const std::vector &additional_depends) +sycl::event + fabs_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) { return ew_cmn_ns::unary_strided_impl< argTy, FabsOutputType, FabsStridedFunctor, fabs_strided_kernel>( @@ -165,7 +148,8 @@ sycl::event fabs_strided_impl(sycl::queue &exec_q, res_offset, depends, additional_depends); } -template struct FabsStridedFactory +template +struct FabsStridedFactory { fnT get() { @@ -180,5 +164,4 @@ template struct FabsStridedFactory } } }; - -} // namespace dpnp::backend::kernels::fabs +} // namespace dpnp::kernels::fabs diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index a82a96ed0c59..f6df42981a9f 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -40,7 +40,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_DEGREES_EXT DPNP_FN_EDIFF1D_EXT DPNP_FN_ERF_EXT - DPNP_FN_FABS_EXT DPNP_FN_FFT_FFT_EXT DPNP_FN_FFT_RFFT_EXT DPNP_FN_FMOD_EXT diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index 2b8d63c6d2dd..405037da7829 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -37,7 +37,6 @@ and the rest of the library __all__ += [ "dpnp_ediff1d", - "dpnp_fabs", "dpnp_fmod", "dpnp_fmax", "dpnp_fmin", @@ -110,10 +109,6 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): return result -cpdef utils.dpnp_descriptor dpnp_fabs(utils.dpnp_descriptor x1): - return call_fptr_1in_1out_strides(DPNP_FN_FABS_EXT, x1) - - cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj, utils.dpnp_descriptor x2_obj, object dtype=None, diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index b0d0c7b61237..d3c2d96c46ad 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -55,12 +55,12 @@ ) import dpnp +import dpnp.backend.extensions.ufunc._ufunc_impl as ufi import dpnp.backend.extensions.vm._vm_impl as vmi from .backend.extensions.sycl_ext import _sycl_ext_impl from .dpnp_algo import ( dpnp_ediff1d, - dpnp_fabs, dpnp_fmax, dpnp_fmin, dpnp_fmod, @@ -1347,39 +1347,52 @@ def ediff1d(x1, to_end=None, to_begin=None): return call_origin(numpy.ediff1d, x1, to_end=to_end, to_begin=to_begin) -def fabs(x1, **kwargs): - """ - Compute the absolute values element-wise. +_FABS_DOCSTRING = """ +Compute the absolute values element-wise. - For full documentation refer to :obj:`numpy.fabs`. +This function returns the absolute values (positive magnitude) of the data in +`x`. Complex values are not handled, use :obj:`dpnp.absolute` to find the +absolute values of complex data. - Limitations - ----------- - Parameter `x1` is supported as :class:`dpnp.ndarray`. - Keyword argument `kwargs` is currently unsupported. - Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. +For full documentation refer to :obj:`numpy.fabs`. - See Also - -------- - :obj:`dpnp.absolute` : Calculate the absolute value element-wise. +Parameters +---------- +x : {dpnp.ndarray, usm_ndarray} + The array of numbers for which the absolute values are required. +out : {None, dpnp.ndarray, usm_ndarray}, optional + Output array to populate. + Array must have the correct shape and the expected data type. +order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + Default: ``"K"``. - Examples - -------- - >>> import dpnp as np - >>> result = np.fabs(np.array([1, -2, 6, -9])) - >>> [x for x in result] - [1.0, 2.0, 6.0, 9.0] +Returns +------- +out : dpnp.ndarray + The absolute values of `x`, the returned values are always floats. + If `x` does not have a real-valued data type, the returned array + will have a data type that depends on the capabilities of the device + on which the array resides. - """ +See Also +-------- +:obj:`dpnp.absolute` : Absolute values including `complex` types. - x1_desc = dpnp.get_dpnp_descriptor( - x1, copy_when_strides=False, copy_when_nondefault_queue=False - ) - if x1_desc: - return dpnp_fabs(x1_desc).get_pyobj() +Examples +-------- +>>> import dpnp as np +>>> a = np.array([-1.2, 1.2]) +>>> np.fabs(a) +array([1.2, 1.2]) +""" - return call_origin(numpy.fabs, x1, **kwargs) +fabs = DPNPUnaryFunc( + "fabs", + ufi._fabs_result_type, + ufi._fabs, + _FABS_DOCSTRING, +) _FLOOR_DOCSTRING = """ From 18cdb609d2f8581149ad5c23bf45a41f902e9e32 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Jun 2024 14:22:29 +0200 Subject: [PATCH 24/35] Enable and add relating tests --- dpnp/dpnp_iface_mathematical.py | 2 + tests/skipped_tests.tbl | 92 ------------------- tests/skipped_tests_gpu.tbl | 90 ------------------ tests/skipped_tests_gpu_no_fp64.tbl | 7 -- tests/test_usm_type.py | 1 + .../third_party/cupy/math_tests/test_misc.py | 10 +- 6 files changed, 10 insertions(+), 192 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index d3c2d96c46ad..513abca394e5 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1392,6 +1392,8 @@ def ediff1d(x1, to_end=None, to_begin=None): ufi._fabs_result_type, ufi._fabs, _FABS_DOCSTRING, + mkl_fn_to_call=vmi._mkl_abs_to_call, + mkl_impl_fn=vmi._abs, ) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 5e012b3a4966..c86b0d848c5b 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -36,7 +36,6 @@ tests/third_party/cupy/fft_tests/test_fft.py::TestFftn_param_23_{axes=None, norm tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory tests/test_strides.py::test_strides_1arg[(10,)-None-degrees] -tests/test_strides.py::test_strides_1arg[(10,)-None-fabs] tests/test_strides.py::test_strides_1arg[(10,)-None-radians] tests/test_umath.py::test_umaths[('divmod', 'ii')] @@ -260,12 +259,6 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_inf_ar tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[nan] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[posinf] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[neginf] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_0_{mode='valid'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_1_{mode='same'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_2_{mode='full'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_0_{mode='valid'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_1_{mode='same'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_2_{mode='full'}] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_scalar_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_copy @@ -292,91 +285,6 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_interp_inf_to_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_heaviside tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_heaviside_nan_inf -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_0_{mode='valid', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_1_{mode='valid', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_2_{mode='valid', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_3_{mode='valid', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_4_{mode='valid', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_5_{mode='valid', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_6_{mode='valid', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_7_{mode='valid', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_8_{mode='valid', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_9_{mode='valid', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_10_{mode='valid', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_11_{mode='valid', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_12_{mode='valid', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_13_{mode='valid', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_14_{mode='valid', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_15_{mode='valid', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_16_{mode='valid', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_17_{mode='valid', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_18_{mode='valid', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_19_{mode='valid', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_20_{mode='valid', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_21_{mode='valid', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_22_{mode='valid', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_23_{mode='valid', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_24_{mode='valid', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_25_{mode='same', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_26_{mode='same', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_27_{mode='same', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_28_{mode='same', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_29_{mode='same', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_30_{mode='same', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_31_{mode='same', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_32_{mode='same', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_33_{mode='same', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_34_{mode='same', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_35_{mode='same', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_36_{mode='same', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_37_{mode='same', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_38_{mode='same', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_39_{mode='same', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_40_{mode='same', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_41_{mode='same', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_42_{mode='same', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_43_{mode='same', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_44_{mode='same', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_45_{mode='same', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_46_{mode='same', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_47_{mode='same', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_48_{mode='same', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_49_{mode='same', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_50_{mode='full', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_51_{mode='full', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_52_{mode='full', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_53_{mode='full', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_54_{mode='full', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_55_{mode='full', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_56_{mode='full', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_57_{mode='full', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_58_{mode='full', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_59_{mode='full', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_60_{mode='full', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_61_{mode='full', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_62_{mode='full', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_63_{mode='full', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_64_{mode='full', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_65_{mode='full', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_66_{mode='full', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_67_{mode='full', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_68_{mode='full', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_69_{mode='full', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_70_{mode='full', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_71_{mode='full', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_72_{mode='full', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_73_{mode='full', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_74_{mode='full', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[full] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[full] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[full] - tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_discont diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index e14b954abe63..45b41f2dafbc 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -310,12 +310,6 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_inf_ar tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[nan] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[posinf] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_broadcast[neginf] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_0_{mode='valid'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_1_{mode='same'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_empty[_param_2_{mode='full'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_0_{mode='valid'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_1_{mode='same'}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveInvalid::test_convolve_ndim[_param_2_{mode='full'}] tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_scalar_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_nan_to_num_copy @@ -341,90 +335,6 @@ tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_interp_size1 tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_interp_inf_to_nan tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_heaviside tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_heaviside_nan_inf -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_0_{mode='valid', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_1_{mode='valid', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_2_{mode='valid', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_3_{mode='valid', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_4_{mode='valid', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_5_{mode='valid', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_6_{mode='valid', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_7_{mode='valid', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_8_{mode='valid', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_9_{mode='valid', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_10_{mode='valid', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_11_{mode='valid', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_12_{mode='valid', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_13_{mode='valid', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_14_{mode='valid', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_15_{mode='valid', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_16_{mode='valid', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_17_{mode='valid', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_18_{mode='valid', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_19_{mode='valid', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_20_{mode='valid', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_21_{mode='valid', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_22_{mode='valid', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_23_{mode='valid', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_24_{mode='valid', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_25_{mode='same', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_26_{mode='same', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_27_{mode='same', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_28_{mode='same', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_29_{mode='same', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_30_{mode='same', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_31_{mode='same', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_32_{mode='same', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_33_{mode='same', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_34_{mode='same', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_35_{mode='same', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_36_{mode='same', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_37_{mode='same', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_38_{mode='same', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_39_{mode='same', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_40_{mode='same', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_41_{mode='same', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_42_{mode='same', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_43_{mode='same', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_44_{mode='same', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_45_{mode='same', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_46_{mode='same', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_47_{mode='same', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_48_{mode='same', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_49_{mode='same', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_50_{mode='full', shape1=(), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_51_{mode='full', shape1=(), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_52_{mode='full', shape1=(), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_53_{mode='full', shape1=(), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_54_{mode='full', shape1=(), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_55_{mode='full', shape1=(5,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_56_{mode='full', shape1=(5,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_57_{mode='full', shape1=(5,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_58_{mode='full', shape1=(5,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_59_{mode='full', shape1=(5,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_60_{mode='full', shape1=(6,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_61_{mode='full', shape1=(6,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_62_{mode='full', shape1=(6,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_63_{mode='full', shape1=(6,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_64_{mode='full', shape1=(6,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_65_{mode='full', shape1=(20,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_66_{mode='full', shape1=(20,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_67_{mode='full', shape1=(20,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_68_{mode='full', shape1=(20,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_69_{mode='full', shape1=(20,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_70_{mode='full', shape1=(21,), shape2=()}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_71_{mode='full', shape1=(21,), shape2=(5,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_72_{mode='full', shape1=(21,), shape2=(6,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_73_{mode='full', shape1=(21,), shape2=(20,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolveShapeCombination::test_convolve[_param_74_{mode='full', shape1=(21,), shape2=(21,)}] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_non_contiguous[full] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_large_non_contiguous[full] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[valid] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[same] -tests/third_party/cupy/math_tests/test_misc.py::TestConvolve::test_convolve_diff_types[full] tests/third_party/cupy/math_tests/test_rounding.py::TestRounding::test_fix diff --git a/tests/skipped_tests_gpu_no_fp64.tbl b/tests/skipped_tests_gpu_no_fp64.tbl index c209c876df6b..44e4c856b773 100644 --- a/tests/skipped_tests_gpu_no_fp64.tbl +++ b/tests/skipped_tests_gpu_no_fp64.tbl @@ -1,12 +1,5 @@ -tests/test_strides.py::test_strides_1arg[(10,)-int32-fabs] -tests/test_strides.py::test_strides_1arg[(10,)-int64-fabs] -tests/test_strides.py::test_strides_1arg[(10,)-None-fabs] - tests/test_umath.py::test_umaths[('floor_divide', 'ff')] -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fabs -tests/third_party/cupy/math_tests/test_misc.py::TestMisc::test_fabs_negative - tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsBeta_param_6_{a_shape=(3, 2), b_shape=(3, 2), shape=(4, 3, 2)}::test_beta diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index f66017ea6e26..2420f7d9c2f1 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -538,6 +538,7 @@ def test_norm(usm_type, ord, axis): pytest.param("exp", [1.0, 2.0, 4.0, 7.0]), pytest.param("exp2", [0.0, 1.0, 2.0]), pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]), + pytest.param("fabs", [-1.2, 1.2]), pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]), pytest.param("gradient", [1, 2, 4, 7, 11, 16]), pytest.param("histogram_bin_edges", [0, 0, 0, 1, 2, 3, 3, 4, 5]), diff --git a/tests/third_party/cupy/math_tests/test_misc.py b/tests/third_party/cupy/math_tests/test_misc.py index dd7fe9dcc1aa..62717803aca3 100644 --- a/tests/third_party/cupy/math_tests/test_misc.py +++ b/tests/third_party/cupy/math_tests/test_misc.py @@ -26,6 +26,7 @@ def check_binary(self, name, xp, dtype, no_bool=False): @testing.for_dtypes(["?", "b", "h", "i", "q", "e", "f", "d", "F", "D"]) @testing.numpy_cupy_allclose(atol=1e-5) + # TODO: remove no_comlex=True, once adopted to numpy 2.0 def check_unary_negative( self, name, xp, dtype, no_bool=False, no_complex=False ): @@ -184,13 +185,13 @@ def test_absolute_negative(self): self.check_unary_negative("absolute") @testing.for_all_dtypes(no_complex=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_fabs(self, xp, dtype): a = xp.array([2, 3, 4], dtype=dtype) return xp.fabs(a) @testing.for_all_dtypes(no_complex=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_fabs_negative(self, xp, dtype): a = xp.array([-2.0, -4.0, 0.0, 4.0], dtype=dtype) return xp.fabs(a) @@ -198,7 +199,7 @@ def test_fabs_negative(self, xp, dtype): def test_sign(self): self.check_unary("sign", no_bool=True) - # TODO: remove no_comlex=True, when numpy 2.0.0 will release + # TODO: remove no_comlex=True, once adopted to numpy 2.0 def test_sign_negative(self): self.check_unary_negative("sign", no_bool=True, no_complex=True) @@ -504,6 +505,7 @@ def test_heaviside_nan_inf(self, xp, dtype_1, dtype_2): } ) ) +@pytest.mark.skip("convolve() is not implemented yet") class TestConvolveShapeCombination: @testing.for_all_dtypes(no_float16=True) @testing.numpy_cupy_allclose(rtol=1e-3) @@ -513,6 +515,7 @@ def test_convolve(self, xp, dtype): return xp.convolve(a, b, mode=self.mode) +@pytest.mark.skip("convolve() is not implemented yet") @pytest.mark.parametrize("mode", ["valid", "same", "full"]) class TestConvolve: @testing.for_all_dtypes(no_float16=True) @@ -537,6 +540,7 @@ def test_convolve_diff_types(self, xp, dtype1, dtype2, mode): return xp.convolve(a, b, mode=mode) +@pytest.mark.skip("convolve() is not implemented yet") @testing.parameterize(*testing.product({"mode": ["valid", "same", "full"]})) class TestConvolveInvalid: @testing.for_all_dtypes() From a7f2a0c342d60064af4b73ec6a9e43d30855afe2 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Jun 2024 15:26:57 +0200 Subject: [PATCH 25/35] Decouple populate logic to a macro --- .../ufunc/elementwise_functions/fabs.cpp | 57 ++++---- .../ufunc/elementwise_functions/populate.hpp | 122 ++++++++++++++++++ .../kernels/elementwise_functions/fabs.hpp | 118 ----------------- 3 files changed, 157 insertions(+), 140 deletions(-) create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp index 11c6a3ad065b..7588e1334732 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp @@ -29,6 +29,7 @@ #include "fabs.hpp" #include "kernels/elementwise_functions/fabs.hpp" +#include "populate.hpp" // include a local copy of elementwise common header from dpctl tensor: // dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp @@ -52,7 +53,39 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; namespace impl { -namespace fabs_fn_ns = dpnp::kernels::fabs; +/** + * @brief A factory to define pairs of supported types for which + * sycl::fabs function is available. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = + typename std::disjunction, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +using dpnp::kernels::fabs::FabsFunctor; + +template +using ContigFunctor = ew_cmn_ns::UnaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using StridedFunctor = ew_cmn_ns:: + UnaryStridedFunctor>; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; @@ -62,27 +95,7 @@ static int fabs_output_typeid_vector[td_ns::num_types]; static unary_strided_impl_fn_ptr_t fabs_strided_dispatch_vector[td_ns::num_types]; -void populate_fabs_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = fabs_fn_ns; - - using fn_ns::FabsContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(fabs_contig_dispatch_vector); - - using fn_ns::FabsStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(fabs_strided_dispatch_vector); - - using fn_ns::FabsTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(fabs_output_typeid_vector); -}; +MACRO_POPULATE_DISPATCH_VECTORS(fabs); } // namespace impl void init_fabs(py::module_ m) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp new file mode 100644 index 000000000000..0783ef2717b2 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp @@ -0,0 +1,122 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +/** + * @brief A macro used to define factories and a populating universal functions. + */ +#define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \ + template \ + class __name__##_contig_kernel; \ + \ + template \ + sycl::event __name__##_contig_impl( \ + sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, \ + const std::vector &depends = {}) \ + { \ + return ew_cmn_ns::unary_contig_impl( \ + exec_q, nelems, arg_p, res_p, depends); \ + } \ + \ + template \ + struct ContigFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v::value_type, \ + void>) { \ + fnT fn = nullptr; \ + return fn; \ + } \ + else { \ + fnT fn = __name__##_contig_impl; \ + return fn; \ + } \ + } \ + }; \ + \ + template \ + struct TypeMapFactory \ + { \ + std::enable_if_t::value, int> get() \ + { \ + using rT = typename OutputType::value_type; \ + return td_ns::GetTypeid{}.get(); \ + } \ + }; \ + \ + template \ + class __name__##_strided_kernel; \ + \ + template \ + sycl::event __name__##_strided_impl( \ + sycl::queue &exec_q, size_t nelems, int nd, \ + const ssize_t *shape_and_strides, const char *arg_p, \ + ssize_t arg_offset, char *res_p, ssize_t res_offset, \ + const std::vector &depends, \ + const std::vector &additional_depends) \ + { \ + return ew_cmn_ns::unary_strided_impl< \ + argTy, OutputType, StridedFunctor, __name__##_strided_kernel>( \ + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, \ + res_offset, depends, additional_depends); \ + } \ + \ + template \ + struct StridedFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v::value_type, \ + void>) { \ + fnT fn = nullptr; \ + return fn; \ + } \ + else { \ + fnT fn = __name__##_strided_impl; \ + return fn; \ + } \ + } \ + }; \ + \ + void populate_##__name__##_dispatch_vectors(void) \ + { \ + td_ns::DispatchVectorBuilder \ + dvb1; \ + dvb1.populate_dispatch_vector(__name__##_contig_dispatch_vector); \ + \ + td_ns::DispatchVectorBuilder \ + dvb2; \ + dvb2.populate_dispatch_vector(__name__##_strided_dispatch_vector); \ + \ + td_ns::DispatchVectorBuilder \ + dvb3; \ + dvb3.populate_dispatch_vector(__name__##_output_typeid_vector); \ + }; diff --git a/dpnp/backend/kernels/elementwise_functions/fabs.hpp b/dpnp/backend/kernels/elementwise_functions/fabs.hpp index 22072d5f945b..525cfc5bfe6e 100644 --- a/dpnp/backend/kernels/elementwise_functions/fabs.hpp +++ b/dpnp/backend/kernels/elementwise_functions/fabs.hpp @@ -27,15 +27,8 @@ #include -// dpctl tensor headers -#include "kernels/elementwise_functions/common.hpp" -#include "utils/type_dispatch.hpp" - namespace dpnp::kernels::fabs { -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -namespace td_ns = dpctl::tensor::type_dispatch; - template struct FabsFunctor { @@ -53,115 +46,4 @@ struct FabsFunctor return sycl::fabs(x); } }; - -template -using FabsContigFunctor = ew_cmn_ns::UnaryContigFunctor, - vec_sz, - n_vecs, - enable_sg_loadstore>; - -template -struct FabsOutputType -{ - using value_type = typename std::disjunction< // disjunction is C++17 - // feature, supported by DPC++ - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::DefaultResultEntry>::result_type; -}; - -template -class fabs_contig_kernel; - -template -sycl::event fabs_contig_impl(sycl::queue &exec_q, - size_t nelems, - const char *arg_p, - char *res_p, - const std::vector &depends = {}) -{ - return ew_cmn_ns::unary_contig_impl( - exec_q, nelems, arg_p, res_p, depends); -} - -template -struct FabsContigFactory -{ - fnT get() - { - if constexpr (std::is_same_v::value_type, - void>) { - fnT fn = nullptr; - return fn; - } - else { - fnT fn = fabs_contig_impl; - return fn; - } - } -}; - -template -struct FabsTypeMapFactory -{ - /** - * @brief get typeid for output type of fabs(T x) - */ - std::enable_if_t::value, int> get() - { - using rT = typename FabsOutputType::value_type; - return td_ns::GetTypeid{}.get(); - } -}; - -template -using FabsStridedFunctor = ew_cmn_ns:: - UnaryStridedFunctor>; - -template -class fabs_strided_kernel; - -template -sycl::event - fabs_strided_impl(sycl::queue &exec_q, - size_t nelems, - int nd, - const ssize_t *shape_and_strides, - const char *arg_p, - ssize_t arg_offset, - char *res_p, - ssize_t res_offset, - const std::vector &depends, - const std::vector &additional_depends) -{ - return ew_cmn_ns::unary_strided_impl< - argTy, FabsOutputType, FabsStridedFunctor, fabs_strided_kernel>( - exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, - res_offset, depends, additional_depends); -} - -template -struct FabsStridedFactory -{ - fnT get() - { - if constexpr (std::is_same_v::value_type, - void>) { - fnT fn = nullptr; - return fn; - } - else { - fnT fn = fabs_strided_impl; - return fn; - } - } -}; } // namespace dpnp::kernels::fabs From 578d02476257512cd2ed64ca6c9b6c5a3ee33f4d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Jun 2024 16:42:08 +0200 Subject: [PATCH 26/35] Resolve compilation failure on Win --- .../extensions/ufunc/elementwise_functions/populate.hpp | 4 ++-- dpnp/backend/extensions/vm/add.cpp | 6 +++--- dpnp/backend/extensions/vm/atan2.cpp | 6 +++--- dpnp/backend/extensions/vm/div.cpp | 6 +++--- dpnp/backend/extensions/vm/hypot.cpp | 6 +++--- dpnp/backend/extensions/vm/mul.cpp | 6 +++--- dpnp/backend/extensions/vm/pow.cpp | 6 +++--- dpnp/backend/extensions/vm/sub.cpp | 6 +++--- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp index 0783ef2717b2..6261fcc08eb6 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp @@ -76,8 +76,8 @@ template \ sycl::event __name__##_strided_impl( \ sycl::queue &exec_q, size_t nelems, int nd, \ - const ssize_t *shape_and_strides, const char *arg_p, \ - ssize_t arg_offset, char *res_p, ssize_t res_offset, \ + const py::ssize_t *shape_and_strides, const char *arg_p, \ + py::ssize_t arg_offset, char *res_p, py::ssize_t res_offset, \ const std::vector &depends, \ const std::vector &additional_depends) \ { \ diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp index c43f07bbcde1..c174bf73a99d 100644 --- a/dpnp/backend/extensions/vm/add.cpp +++ b/dpnp/backend/extensions/vm/add.cpp @@ -83,11 +83,11 @@ template static sycl::event add_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp index 30bb59c9c422..4820a9623f0c 100644 --- a/dpnp/backend/extensions/vm/atan2.cpp +++ b/dpnp/backend/extensions/vm/atan2.cpp @@ -73,11 +73,11 @@ template static sycl::event atan2_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 8cdb547feb4e..5fb7122a76c2 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -83,11 +83,11 @@ template static sycl::event div_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp index 42dd81271111..50ca178c37c3 100644 --- a/dpnp/backend/extensions/vm/hypot.cpp +++ b/dpnp/backend/extensions/vm/hypot.cpp @@ -73,11 +73,11 @@ template static sycl::event hypot_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp index 34007fbc07c2..de59d087f516 100644 --- a/dpnp/backend/extensions/vm/mul.cpp +++ b/dpnp/backend/extensions/vm/mul.cpp @@ -83,11 +83,11 @@ template static sycl::event mul_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp index 65acd2ece44b..491b86f79469 100644 --- a/dpnp/backend/extensions/vm/pow.cpp +++ b/dpnp/backend/extensions/vm/pow.cpp @@ -83,11 +83,11 @@ template static sycl::event pow_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp index 4ec1bdc36b50..8bfc477bfa79 100644 --- a/dpnp/backend/extensions/vm/sub.cpp +++ b/dpnp/backend/extensions/vm/sub.cpp @@ -83,11 +83,11 @@ template static sycl::event sub_contig_impl(sycl::queue &exec_q, std::size_t in_n, const char *in_a, - ssize_t a_offset, + py::ssize_t a_offset, const char *in_b, - ssize_t b_offset, + py::ssize_t b_offset, char *out_y, - ssize_t out_offset, + py::ssize_t out_offset, const std::vector &depends) { tu_ns::validate_type_for_device(exec_q); From 9666e6483bef4cf3774738f5eb5be0416240faca Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 13 Jun 2024 20:03:21 +0200 Subject: [PATCH 27/35] Implement dpnp.fmod function --- dpnp/backend/extensions/ufunc/CMakeLists.txt | 1 + .../ufunc/elementwise_functions/common.cpp | 2 + .../ufunc/elementwise_functions/fmod.cpp | 192 ++++++++++++++++++ .../ufunc/elementwise_functions/fmod.hpp | 35 ++++ .../ufunc/elementwise_functions/populate.hpp | 110 +++++++++- .../kernels/elementwise_functions/fmod.hpp | 65 ++++++ 6 files changed, 404 insertions(+), 1 deletion(-) create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/fmod.hpp create mode 100644 dpnp/backend/kernels/elementwise_functions/fmod.hpp diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 7f9a240271b1..1d140b066584 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -26,6 +26,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmod.cpp ) set(python_module_name _ufunc_impl) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index 44173fc764fe..b915f9a299a8 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -26,6 +26,7 @@ #include #include "fabs.hpp" +#include "fmod.hpp" namespace py = pybind11; @@ -37,5 +38,6 @@ namespace dpnp::extensions::ufunc void init_elementwise_functions(py::module_ m) { init_fabs(m); + init_fmod(m); } } // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp new file mode 100644 index 000000000000..d156523d429f --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -0,0 +1,192 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +#include "dpctl4pybind11.hpp" + +#include "fmod.hpp" +#include "kernels/elementwise_functions/fmod.hpp" +#include "populate.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; + +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +namespace impl +{ +/** + * @brief A factory to define pairs of supported types for which + * sycl::fmod function is available. + * + * @tparam T1 Type of input vectors `a` + * @tparam T2 Type of input vectors `b` + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +using dpnp::kernels::fmod::FmodFunctor; + +template +using ContigFunctor = + ew_cmn_ns::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using StridedFunctor = + ew_cmn_ns::BinaryStridedFunctor>; + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static binary_contig_impl_fn_ptr_t fmod_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int fmod_output_typeid_table[td_ns::num_types][td_ns::num_types]; +static binary_strided_impl_fn_ptr_t + fmod_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmod); +} // namespace impl + +void init_fmod(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_fmod_dispatch_tables(); + using impl::fmod_contig_dispatch_table; + using impl::fmod_output_typeid_table; + using impl::fmod_strided_dispatch_table; + + auto fmod_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, fmod_output_typeid_table, + fmod_contig_dispatch_table, fmod_strided_dispatch_table, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmod", fmod_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto fmod_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_int::py_binary_ufunc_result_type( + dtype1, dtype2, fmod_output_typeid_table); + }; + m.def("_fmod_result_type", fmod_result_type_pyapi); + } +} +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.hpp new file mode 100644 index 000000000000..cfc61ba218f8 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +void init_fmod(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp index 6261fcc08eb6..9470a41b2ce3 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp @@ -26,7 +26,8 @@ #pragma once /** - * @brief A macro used to define factories and a populating universal functions. + * @brief A macro used to define factories and a populating unary universal + * functions. */ #define MACRO_POPULATE_DISPATCH_VECTORS(__name__) \ template \ + class __name__##_contig_kernel; \ + \ + template \ + sycl::event __name__##_contig_impl( \ + sycl::queue &exec_q, size_t nelems, const char *arg1_p, \ + py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \ + char *res_p, py::ssize_t res_offset, \ + const std::vector &depends = {}) \ + { \ + return ew_cmn_ns::binary_contig_impl( \ + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, \ + res_offset, depends); \ + } \ + \ + template \ + struct ContigFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v< \ + typename OutputType::value_type, void>) \ + { \ + \ + fnT fn = nullptr; \ + return fn; \ + } \ + else { \ + fnT fn = __name__##_contig_impl; \ + return fn; \ + } \ + } \ + }; \ + \ + template \ + struct TypeMapFactory \ + { \ + std::enable_if_t::value, int> get() \ + { \ + using rT = typename OutputType::value_type; \ + return td_ns::GetTypeid{}.get(); \ + } \ + }; \ + \ + template \ + class __name__##_strided_kernel; \ + \ + template \ + sycl::event __name__##_strided_impl( \ + sycl::queue &exec_q, size_t nelems, int nd, \ + const ssize_t *shape_and_strides, const char *arg1_p, \ + py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \ + char *res_p, py::ssize_t res_offset, \ + const std::vector &depends, \ + const std::vector &additional_depends) \ + { \ + return ew_cmn_ns::binary_strided_impl( \ + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, \ + arg2_p, arg2_offset, res_p, res_offset, depends, \ + additional_depends); \ + } \ + \ + template \ + struct StridedFactory \ + { \ + fnT get() \ + { \ + if constexpr (std::is_same_v< \ + typename OutputType::value_type, void>) \ + { \ + fnT fn = nullptr; \ + return fn; \ + } \ + else { \ + fnT fn = __name__##_strided_impl; \ + return fn; \ + } \ + } \ + }; \ + \ + void populate_##__name__##_dispatch_tables(void) \ + { \ + td_ns::DispatchTableBuilder \ + dvb1; \ + dvb1.populate_dispatch_table(__name__##_contig_dispatch_table); \ + \ + td_ns::DispatchTableBuilder \ + dvb2; \ + dvb2.populate_dispatch_table(__name__##_strided_dispatch_table); \ + \ + td_ns::DispatchTableBuilder \ + dvb3; \ + dvb3.populate_dispatch_table(__name__##_output_typeid_table); \ + }; diff --git a/dpnp/backend/kernels/elementwise_functions/fmod.hpp b/dpnp/backend/kernels/elementwise_functions/fmod.hpp new file mode 100644 index 000000000000..49333fa6f60b --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/fmod.hpp @@ -0,0 +1,65 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace dpnp::kernels::fmod +{ +template +struct FmodFunctor +{ + // using supports_sg_loadstore = std::negation< + // std::disjunction, + // tu_ns::is_complex>>; + // using supports_vec = std::negation< + // std::disjunction, + // tu_ns::is_complex>>; + + using supports_sg_loadstore = typename std::false_type; + using supports_vec = typename std::false_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral::value && + std::is_integral::value) { + if (in2 == argT2(0)) { + return resT(0); + } + return in1 % in2; + } + else if constexpr (std::is_integral::value) { + return sycl::fmod(argT2(in1), in2); + } + else if constexpr (std::is_integral::value) { + return sycl::fmod(in1, argT1(in2)); + } + else { + return sycl::fmod(in1, in2); + } + } +}; +} // namespace dpnp::kernels::fmod From 7d5147b7f17bee72f84495928a33fedf590ba018 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 13 Jun 2024 21:53:13 +0200 Subject: [PATCH 28/35] Add vector implementation and dedicated kernel for boolean inputs --- .../ufunc/elementwise_functions/fmod.cpp | 1 + .../kernels/elementwise_functions/fmod.hpp | 26 +- dpnp/dpnp_iface_mathematical.py | 229 +++++++++--------- 3 files changed, 131 insertions(+), 125 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index d156523d429f..95c7d4aefc46 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -64,6 +64,7 @@ template struct OutputType { using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, td_ns::BinaryTypeMapResultEntry struct FmodFunctor { - // using supports_sg_loadstore = std::negation< - // std::disjunction, - // tu_ns::is_complex>>; - // using supports_vec = std::negation< - // std::disjunction, - // tu_ns::is_complex>>; - - using supports_sg_loadstore = typename std::false_type; - using supports_vec = typename std::false_type; + using supports_sg_loadstore = typename std::true_type; + using supports_vec = std::negation< + std::conjunction, std::is_integral>>; resT operator()(const argT1 &in1, const argT2 &in2) const { @@ -51,15 +45,17 @@ struct FmodFunctor } return in1 % in2; } - else if constexpr (std::is_integral::value) { - return sycl::fmod(argT2(in1), in2); - } - else if constexpr (std::is_integral::value) { - return sycl::fmod(in1, argT1(in2)); - } else { return sycl::fmod(in1, in2); } } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + return sycl::fmod(in1, in2); + } }; } // namespace dpnp::kernels::fmod diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 513abca394e5..6ada59426aa9 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1748,116 +1748,125 @@ def fmin(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): ) -def fmod(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): - """ - Returns the element-wise remainder of division. - - For full documentation refer to :obj:`numpy.fmod`. - - Returns - ------- - out : dpnp.ndarray - The remainder of the division of `x1` by `x2`. - - Limitations - ----------- - Parameters `x1` and `x2` are supported as either scalar, - :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` - and `x2` can not be scalars at the same time. - Parameters `where`, `dtype` and `subok` are supported with their default - values. - Keyword argument `kwargs` is currently unsupported. - Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - - See Also - -------- - :obj:`dpnp.remainder` : Remainder complementary to floor_divide. - :obj:`dpnp.divide` : Standard division. - - Examples - -------- - >>> import dpnp as np - >>> a = np.array([-3, -2, -1, 1, 2, 3]) - >>> np.fmod(a, 2) - array([-1, 0, -1, 1, 0, 1]) - >>> np.remainder(a, 2) - array([1, 0, 1, 1, 0, 1]) - - >>> a = np.array([5, 3]) - >>> b = np.array([2, 2.]) - >>> np.fmod(a, b) - array([1., 1.]) - - >>> a = np.arange(-3, 3).reshape(3, 2) - >>> a - array([[-3, -2], - [-1, 0], - [ 1, 2]]) - >>> b = np.array([2, 2]) - >>> np.fmod(a, b) - array([[-1, 0], - [-1, 0], - [ 1, 0]]) - - """ - - if kwargs: - pass - elif where is not True: - pass - elif dtype is not None: - pass - elif subok is not True: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass - else: - # get USM type and queue to copy scalar from the host memory into - # a USM allocation - usm_type, queue = ( - get_usm_allocations([x1, x2]) - if dpnp.isscalar(x1) or dpnp.isscalar(x2) - else (None, None) - ) - - x1_desc = dpnp.get_dpnp_descriptor( - x1, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - x2_desc = dpnp.get_dpnp_descriptor( - x2, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - if x1_desc and x2_desc: - if out is not None: - if not dpnp.is_supported_array_type(out): - raise TypeError( - "return array must be of supported array type" - ) - out_desc = ( - dpnp.get_dpnp_descriptor( - out, copy_when_nondefault_queue=False - ) - or None - ) - else: - out_desc = None - - return dpnp_fmod( - x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where - ).get_pyobj() - - return call_origin( - numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs - ) +fmod = DPNPBinaryFunc( + "fmod", + ufi._fmod_result_type, + ufi._fmod, + "", + # mkl_fn_to_call=vmi._mkl_mul_to_call, + # mkl_impl_fn=vmi._mul, + # binary_inplace_fn=ti._multiply_inplace, +) +# def fmod(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): +# """ +# Returns the element-wise remainder of division. + +# For full documentation refer to :obj:`numpy.fmod`. + +# Returns +# ------- +# out : dpnp.ndarray +# The remainder of the division of `x1` by `x2`. + +# Limitations +# ----------- +# Parameters `x1` and `x2` are supported as either scalar, +# :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` +# and `x2` can not be scalars at the same time. +# Parameters `where`, `dtype` and `subok` are supported with their default +# values. +# Keyword argument `kwargs` is currently unsupported. +# Otherwise the function will be executed sequentially on CPU. +# Input array data types are limited by supported DPNP :ref:`Data types`. + +# See Also +# -------- +# :obj:`dpnp.remainder` : Remainder complementary to floor_divide. +# :obj:`dpnp.divide` : Standard division. + +# Examples +# -------- +# >>> import dpnp as np +# >>> a = np.array([-3, -2, -1, 1, 2, 3]) +# >>> np.fmod(a, 2) +# array([-1, 0, -1, 1, 0, 1]) +# >>> np.remainder(a, 2) +# array([1, 0, 1, 1, 0, 1]) + +# >>> a = np.array([5, 3]) +# >>> b = np.array([2, 2.]) +# >>> np.fmod(a, b) +# array([1., 1.]) + +# >>> a = np.arange(-3, 3).reshape(3, 2) +# >>> a +# array([[-3, -2], +# [-1, 0], +# [ 1, 2]]) +# >>> b = np.array([2, 2]) +# >>> np.fmod(a, b) +# array([[-1, 0], +# [-1, 0], +# [ 1, 0]]) + +# """ + +# if kwargs: +# pass +# elif where is not True: +# pass +# elif dtype is not None: +# pass +# elif subok is not True: +# pass +# elif dpnp.isscalar(x1) and dpnp.isscalar(x2): +# # at least either x1 or x2 has to be an array +# pass +# else: +# # get USM type and queue to copy scalar from the host memory into +# # a USM allocation +# usm_type, queue = ( +# get_usm_allocations([x1, x2]) +# if dpnp.isscalar(x1) or dpnp.isscalar(x2) +# else (None, None) +# ) + +# x1_desc = dpnp.get_dpnp_descriptor( +# x1, +# copy_when_strides=False, +# copy_when_nondefault_queue=False, +# alloc_usm_type=usm_type, +# alloc_queue=queue, +# ) +# x2_desc = dpnp.get_dpnp_descriptor( +# x2, +# copy_when_strides=False, +# copy_when_nondefault_queue=False, +# alloc_usm_type=usm_type, +# alloc_queue=queue, +# ) +# if x1_desc and x2_desc: +# if out is not None: +# if not dpnp.is_supported_array_type(out): +# raise TypeError( +# "return array must be of supported array type" +# ) +# out_desc = ( +# dpnp.get_dpnp_descriptor( +# out, copy_when_nondefault_queue=False +# ) +# or None +# ) +# else: +# out_desc = None + +# return dpnp_fmod( +# x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where +# ).get_pyobj() + +# return call_origin( +# numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs +# ) def gradient(f, *varargs, axis=None, edge_order=1): From d8e190d11edfec85ad97fcda39a2860a7d26a492 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 13 Jun 2024 22:48:03 +0200 Subject: [PATCH 29/35] Update python implementation part --- dpnp/backend/include/dpnp_iface_fptr.hpp | 10 +- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 14 -- dpnp/dpnp_algo/dpnp_algo.pxd | 1 - dpnp/dpnp_algo/dpnp_algo_mathematical.pxi | 9 - dpnp/dpnp_iface_mathematical.py | 178 +++++++------------- 5 files changed, 68 insertions(+), 144 deletions(-) diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 0f6ef51bc7ce..1172bcbe4f5f 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -140,12 +140,10 @@ enum class DPNPFuncName : size_t DPNP_FN_FLOOR, /**< Used in numpy.floor() impl */ DPNP_FN_FLOOR_DIVIDE, /**< Used in numpy.floor_divide() impl */ DPNP_FN_FMOD, /**< Used in numpy.fmod() impl */ - DPNP_FN_FMOD_EXT, /**< Used in numpy.fmod() impl, requires extra parameters - */ - DPNP_FN_FULL, /**< Used in numpy.full() impl */ - DPNP_FN_FULL_LIKE, /**< Used in numpy.full_like() impl */ - DPNP_FN_HYPOT, /**< Used in numpy.hypot() impl */ - DPNP_FN_IDENTITY, /**< Used in numpy.identity() impl */ + DPNP_FN_FULL, /**< Used in numpy.full() impl */ + DPNP_FN_FULL_LIKE, /**< Used in numpy.full_like() impl */ + DPNP_FN_HYPOT, /**< Used in numpy.hypot() impl */ + DPNP_FN_IDENTITY, /**< Used in numpy.identity() impl */ DPNP_FN_INITVAL, /**< Used in numpy ones, ones_like, zeros, zeros_like impls */ DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 122a3ccdedd3..486851516dcf 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1401,20 +1401,6 @@ static void func_map_elemwise_2arg_3type_core(func_map_t &fmap) template static void func_map_elemwise_2arg_3type_short_core(func_map_t &fmap) { - ((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] = - {get_floating_res_type(), - (void *) - dpnp_fmod_c_ext()>, - func_type_map_t::find_type, - func_type_map_t::find_type>, - get_floating_res_type(), - (void *)dpnp_fmod_c_ext< - func_type_map_t::find_type()>, - func_type_map_t::find_type, - func_type_map_t::find_type>}), - ...); ((fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][FT1][FTs] = {get_floating_res_type(), (void *)dpnp_maximum_c_ext< diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index f6df42981a9f..4e91151697c0 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -42,7 +42,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_ERF_EXT DPNP_FN_FFT_FFT_EXT DPNP_FN_FFT_RFFT_EXT - DPNP_FN_FMOD_EXT DPNP_FN_MAXIMUM_EXT DPNP_FN_MEDIAN_EXT DPNP_FN_MINIMUM_EXT diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi index 405037da7829..fca1e6dc3036 100644 --- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi @@ -37,7 +37,6 @@ and the rest of the library __all__ += [ "dpnp_ediff1d", - "dpnp_fmod", "dpnp_fmax", "dpnp_fmin", "dpnp_modf", @@ -109,14 +108,6 @@ cpdef utils.dpnp_descriptor dpnp_ediff1d(utils.dpnp_descriptor x1): return result -cpdef utils.dpnp_descriptor dpnp_fmod(utils.dpnp_descriptor x1_obj, - utils.dpnp_descriptor x2_obj, - object dtype=None, - utils.dpnp_descriptor out=None, - object where=True): - return call_fptr_2in_1out_strides(DPNP_FN_FMOD_EXT, x1_obj, x2_obj, dtype, out, where) - - cpdef utils.dpnp_descriptor dpnp_fmax(utils.dpnp_descriptor x1_obj, utils.dpnp_descriptor x2_obj, object dtype=None, diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 6ada59426aa9..e8922a7e011d 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -63,7 +63,6 @@ dpnp_ediff1d, dpnp_fmax, dpnp_fmin, - dpnp_fmod, dpnp_modf, dpnp_trapz, ) @@ -1748,125 +1747,75 @@ def fmin(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): ) +_FMOD_DOCSTRING = """ +Calculates the remainder of division for each element `x1_i` of the input array +`x1` with the respective element `x2_i` of the input array `x2`. + +This function is equivalent to the Matlab(TM) ``rem`` function and should not +be confused with the Python modulus operator ``x1 % x2``. + +For full documentation refer to :obj:`numpy.fmod`. + +Parameters +---------- +x1 : {dpnp.ndarray, usm_ndarray} + First input array, expected to have a real-valued data type. +x2 : {dpnp.ndarray, usm_ndarray} + Second input array, also expected to have a real-valued data type. +out : {None, dpnp.ndarray, usm_ndarray}, optional + Output array to populate. + Array must have the correct shape and the expected data type. +order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + Default: ``"K"``. + +Returns +------- +out : dpnp.ndarray + An array containing the element-wise remainders. The data type of the + returned array is determined by the Type Promotion Rules. + +Limitations +---------- +Parameters `where` and `subok` are supported with their default values. +Keyword argument `kwargs` is currently unsupported. +Otherwise ``NotImplementedError`` exception will be raised. + +See Also +-------- +:obj:`dpnp.remainder` : Equivalent to the Python ``%`` operator. +:obj:`dpnp.divide` : Standard division. + +Examples +-------- +>>> import dpnp as np +>>> a = np.array([-3, -2, -1, 1, 2, 3]) +>>> np.fmod(a, 2) +array([-1, 0, -1, 1, 0, 1]) +>>> np.remainder(a, 2) +array([1, 0, 1, 1, 0, 1]) + +>>> np.fmod(np.array([5, 3]), np.array([2, 2.])) +array([1., 1.]) +>>> a = np.arange(-3, 3).reshape(3, 2) +>>> a +array([[-3, -2], + [-1, 0], + [ 1, 2]]) +>>> np.fmod(a, np.array([2, 2])) +array([[-1, 0], + [-1, 0], + [ 1, 0]]) +""" + fmod = DPNPBinaryFunc( "fmod", ufi._fmod_result_type, ufi._fmod, - "", + _FMOD_DOCSTRING, # mkl_fn_to_call=vmi._mkl_mul_to_call, # mkl_impl_fn=vmi._mul, - # binary_inplace_fn=ti._multiply_inplace, ) -# def fmod(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): -# """ -# Returns the element-wise remainder of division. - -# For full documentation refer to :obj:`numpy.fmod`. - -# Returns -# ------- -# out : dpnp.ndarray -# The remainder of the division of `x1` by `x2`. - -# Limitations -# ----------- -# Parameters `x1` and `x2` are supported as either scalar, -# :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`, but both `x1` -# and `x2` can not be scalars at the same time. -# Parameters `where`, `dtype` and `subok` are supported with their default -# values. -# Keyword argument `kwargs` is currently unsupported. -# Otherwise the function will be executed sequentially on CPU. -# Input array data types are limited by supported DPNP :ref:`Data types`. - -# See Also -# -------- -# :obj:`dpnp.remainder` : Remainder complementary to floor_divide. -# :obj:`dpnp.divide` : Standard division. - -# Examples -# -------- -# >>> import dpnp as np -# >>> a = np.array([-3, -2, -1, 1, 2, 3]) -# >>> np.fmod(a, 2) -# array([-1, 0, -1, 1, 0, 1]) -# >>> np.remainder(a, 2) -# array([1, 0, 1, 1, 0, 1]) - -# >>> a = np.array([5, 3]) -# >>> b = np.array([2, 2.]) -# >>> np.fmod(a, b) -# array([1., 1.]) - -# >>> a = np.arange(-3, 3).reshape(3, 2) -# >>> a -# array([[-3, -2], -# [-1, 0], -# [ 1, 2]]) -# >>> b = np.array([2, 2]) -# >>> np.fmod(a, b) -# array([[-1, 0], -# [-1, 0], -# [ 1, 0]]) - -# """ - -# if kwargs: -# pass -# elif where is not True: -# pass -# elif dtype is not None: -# pass -# elif subok is not True: -# pass -# elif dpnp.isscalar(x1) and dpnp.isscalar(x2): -# # at least either x1 or x2 has to be an array -# pass -# else: -# # get USM type and queue to copy scalar from the host memory into -# # a USM allocation -# usm_type, queue = ( -# get_usm_allocations([x1, x2]) -# if dpnp.isscalar(x1) or dpnp.isscalar(x2) -# else (None, None) -# ) - -# x1_desc = dpnp.get_dpnp_descriptor( -# x1, -# copy_when_strides=False, -# copy_when_nondefault_queue=False, -# alloc_usm_type=usm_type, -# alloc_queue=queue, -# ) -# x2_desc = dpnp.get_dpnp_descriptor( -# x2, -# copy_when_strides=False, -# copy_when_nondefault_queue=False, -# alloc_usm_type=usm_type, -# alloc_queue=queue, -# ) -# if x1_desc and x2_desc: -# if out is not None: -# if not dpnp.is_supported_array_type(out): -# raise TypeError( -# "return array must be of supported array type" -# ) -# out_desc = ( -# dpnp.get_dpnp_descriptor( -# out, copy_when_nondefault_queue=False -# ) -# or None -# ) -# else: -# out_desc = None - -# return dpnp_fmod( -# x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where -# ).get_pyobj() - -# return call_origin( -# numpy.fmod, x1, x2, dtype=dtype, out=out, where=where, **kwargs -# ) def gradient(f, *varargs, axis=None, edge_order=1): @@ -2815,7 +2764,7 @@ def prod( First input array, expected to have a real-valued data type. x2 : {dpnp.ndarray, usm_ndarray} Second input array, also expected to have a real-valued data type. -out : {None, dpnp.ndarray}, optional +out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. order : {"C", "F", "A", "K"}, optional @@ -2830,6 +2779,7 @@ def prod( array is determined by the Type Promotion Rules. Limitations +---------- Parameters `where` and `subok` are supported with their default values. Keyword argument `kwargs` is currently unsupported. Otherwise ``NotImplementedError`` exception will be raised. From 7eced0fbad4301127264b8d126bb209b9db6cbb3 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 14 Jun 2024 12:58:50 +0200 Subject: [PATCH 30/35] Add MKL function to the VM extension --- dpnp/backend/extensions/vm/CMakeLists.txt | 1 + dpnp/backend/extensions/vm/fmod.cpp | 161 ++++++++++++++++++++++ dpnp/backend/extensions/vm/fmod.hpp | 35 +++++ dpnp/backend/extensions/vm/vm_py.cpp | 2 + dpnp/dpnp_iface_mathematical.py | 4 +- 5 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 dpnp/backend/extensions/vm/fmod.cpp create mode 100644 dpnp/backend/extensions/vm/fmod.hpp diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index ba1e46ea0ed8..de6262581f59 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -43,6 +43,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/exp2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/expm1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/floor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fmod.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hypot.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ln.cpp ${CMAKE_CURRENT_SOURCE_DIR}/log10.cpp diff --git a/dpnp/backend/extensions/vm/fmod.cpp b/dpnp/backend/extensions/vm/fmod.cpp new file mode 100644 index 000000000000..e985492de047 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmod.cpp @@ -0,0 +1,161 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include + +#include "dpctl4pybind11.hpp" + +#include "common.hpp" +#include "fmod.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../elementwise_functions/elementwise_functions.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::vm +{ +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +namespace impl +{ +// OneMKL namespace with VM functions +namespace mkl_vm = oneapi::mkl::vm; + +/** + * @brief A factory to define pairs of supported types for which + * MKL VM library provides support in oneapi::mkl::vm::fmod function. + * + * @tparam T Type of input vectors `a` and `b` and of result vector `y`. + */ +template +struct OutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +static sycl::event fmod_contig_impl(sycl::queue &exec_q, + std::size_t in_n, + const char *in_a, + py::ssize_t a_offset, + const char *in_b, + py::ssize_t b_offset, + char *out_y, + py::ssize_t out_offset, + const std::vector &depends) +{ + tu_ns::validate_type_for_device(exec_q); + tu_ns::validate_type_for_device(exec_q); + + if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) { + throw std::runtime_error("Arrays offsets have to be equals to 0"); + } + + std::int64_t n = static_cast(in_n); + const T1 *a = reinterpret_cast(in_a); + const T2 *b = reinterpret_cast(in_b); + + using resTy = typename OutputType::value_type; + resTy *y = reinterpret_cast(out_y); + + return mkl_vm::fmod(exec_q, + n, // number of elements to be calculated + a, // pointer `a` containing 1st input vector of size n + b, // pointer `b` containing 2nd input vector of size n + y, // pointer `y` to the output vector of size n + depends); +} + +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +static int output_typeid_vector[td_ns::num_types][td_ns::num_types]; +static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types] + [td_ns::num_types]; + +MACRO_POPULATE_DISPATCH_TABLES(fmod); +} // namespace impl + +void init_fmod(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + impl::populate_dispatch_tables(); + using impl::contig_dispatch_vector; + using impl::output_typeid_vector; + + auto fmod_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst, + const event_vecT &depends = {}) { + return py_int::py_binary_ufunc( + src1, src2, dst, exec_q, depends, output_typeid_vector, + contig_dispatch_vector, + // no support of strided implementation in OneMKL + td_ns::NullPtrTable{}, + // no support of C-contig row with broadcasting in OneMKL + td_ns::NullPtrTable< + impl:: + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + td_ns::NullPtrTable< + impl:: + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_fmod", fmod_pyapi, + "Call `fmod` function from OneMKL VM library to performs element " + "by element computation of the modulus function of vector `src1` " + "with respect to vector `src2` to resulting vector `dst`", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("depends") = py::list()); + + auto fmod_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src1, + const arrayT &src2, const arrayT &dst) { + return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst, + output_typeid_vector, + contig_dispatch_vector); + }; + m.def("_mkl_fmod_to_call", fmod_need_to_call_pyapi, + "Check input arguments to answer if `fmod` function from " + "OneMKL VM library can be used", + py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), + py::arg("dst")); +} +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/fmod.hpp b/dpnp/backend/extensions/vm/fmod.hpp new file mode 100644 index 000000000000..492ac8f98899 --- /dev/null +++ b/dpnp/backend/extensions/vm/fmod.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2023-2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::vm +{ +void init_fmod(py::module_ m); +} // namespace dpnp::extensions::vm diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp index 791a8f6d6561..b78ae51ddc30 100644 --- a/dpnp/backend/extensions/vm/vm_py.cpp +++ b/dpnp/backend/extensions/vm/vm_py.cpp @@ -46,6 +46,7 @@ #include "exp2.hpp" #include "expm1.hpp" #include "floor.hpp" +#include "fmod.hpp" #include "hypot.hpp" #include "ln.hpp" #include "log10.hpp" @@ -86,6 +87,7 @@ PYBIND11_MODULE(_vm_impl, m) vm_ns::init_exp2(m); vm_ns::init_expm1(m); vm_ns::init_floor(m); + vm_ns::init_fmod(m); vm_ns::init_hypot(m); vm_ns::init_ln(m); vm_ns::init_log10(m); diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index e8922a7e011d..11d2ee92258f 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1813,8 +1813,8 @@ def fmin(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): ufi._fmod_result_type, ufi._fmod, _FMOD_DOCSTRING, - # mkl_fn_to_call=vmi._mkl_mul_to_call, - # mkl_impl_fn=vmi._mul, + mkl_fn_to_call=vmi._mkl_fmod_to_call, + mkl_impl_fn=vmi._fmod, ) From f4c972bad132b8623888f24d5a53fad8983150dd Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 14 Jun 2024 19:14:35 +0200 Subject: [PATCH 31/35] Add tests --- .../ufunc/elementwise_functions/fmod.cpp | 2 +- tests/test_mathematical.py | 50 +- tests/test_usm_type.py | 1 + .../cupy/core_tests/test_ndarray_math.py | 5 +- .../cupy/math_tests/test_arithmetic.py | 450 ++++++++++++++---- 5 files changed, 406 insertions(+), 102 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index 95c7d4aefc46..dbc215ec1f40 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -64,7 +64,7 @@ template struct OutputType { using value_type = typename std::disjunction< - td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, td_ns::BinaryTypeMapResultEntry>> numpy.fmod(numpy.array([3.9], dtype=numpy.float32), 0.3) @@ -1052,7 +1051,7 @@ def test_fmod(self, dtype, lhs, rhs): >>> numpy.fmod(numpy.array([3.9], dtype=numpy.float64), 0.3) array([9.53674318e-08]) - On a gpu without support for `float64`, dpnp produces results similar to the second one. + On a gpu without fp64 support, dpnp produces results similar to the second one. """ pytest.skip("Due to accuracy reason, the results are different.") self._test_mathematical("fmod", dtype, lhs, rhs, check_type=False) @@ -1299,6 +1298,51 @@ def test_positive_boolean(): dpnp.positive(dpnp_a) +@pytest.mark.parametrize("dtype", get_float_dtypes(no_float16=False)) +def test_float_remainder_magnitude(dtype): + b = numpy.array(1.0, dtype=dtype) + a = numpy.nextafter(numpy.array(0.0, dtype=dtype), -b) + + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.remainder(ia, ib) + expected = numpy.remainder(a, b) + assert_equal(result, expected) + + result = dpnp.remainder(-ia, -ib) + expected = numpy.remainder(-a, -b) + assert_equal(result, expected) + + +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") +@pytest.mark.parametrize("func", ["remainder", "fmod"]) +@pytest.mark.parametrize("dtype", get_float_dtypes(no_float16=False)) +@pytest.mark.parametrize( + "lhs, rhs", + [ + pytest.param(1.0, 0.0, id="one-zero"), + pytest.param(1.0, numpy.inf, id="one-inf"), + pytest.param(numpy.inf, 1.0, id="inf-one"), + pytest.param(numpy.inf, numpy.inf, id="inf-inf"), + pytest.param(numpy.inf, 0.0, id="inf-zero"), + pytest.param(1.0, numpy.nan, id="one-nan"), + pytest.param(numpy.nan, 0.0, id="nan-zero"), + pytest.param(numpy.nan, 1.0, id="nan-one"), + ], +) +def test_float_remainder_fmod_nans_inf(func, dtype, lhs, rhs): + a = numpy.array(lhs, dtype=dtype) + b = numpy.array(rhs, dtype=dtype) + + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = getattr(dpnp, func)(ia, ib) + expected = getattr(numpy, func)(a, b) + assert_equal(result, expected) + + class TestProd: @pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)]) @pytest.mark.parametrize("keepdims", [False, True]) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 2420f7d9c2f1..919b5ad9ad63 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -625,6 +625,7 @@ def test_1in_1out(func, data, usm_type): pytest.param("dot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), pytest.param("fmax", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param("fmin", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), + pytest.param("fmod", [5, 3], [2, 2.0]), pytest.param( "gradient", [1, 2, 4, 7, 11, 16], [0.0, 1.0, 1.5, 3.5, 4.0, 6.0] ), diff --git a/tests/third_party/cupy/core_tests/test_ndarray_math.py b/tests/third_party/cupy/core_tests/test_ndarray_math.py index 3233687789ab..713d1f1b51e6 100644 --- a/tests/third_party/cupy/core_tests/test_ndarray_math.py +++ b/tests/third_party/cupy/core_tests/test_ndarray_math.py @@ -3,7 +3,6 @@ import numpy import pytest -import dpnp as cupy from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing @@ -87,7 +86,7 @@ def test_round_halfway_int(self, xp, dtype): a -= a.size + 1 scale = 10 ** abs(self.decimals) if self.decimals < 0: - a *= xp.array(scale, dtype=dtype) + a *= xp.array(scale).astype(dtype) a >>= 1 return a.round(self.decimals) @@ -105,7 +104,7 @@ def test_round_halfway_uint(self, xp, dtype): a -= 1 scale = 10 ** abs(self.decimals) if self.decimals < 0: - a *= xp.array(scale, dtype=dtype) + a *= xp.array(scale).astype(dtype) a >>= 1 return a.round(self.decimals) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 36593a2a99ef..8037c4f4002c 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -1,29 +1,28 @@ import itertools -import unittest import warnings import numpy import pytest import dpnp as cupy -from tests.helper import has_support_aspect64 +from tests.helper import has_support_aspect16, has_support_aspect64 from tests.third_party.cupy import testing -float_types = list(testing._loops._float_dtypes) -complex_types = [] -signed_int_types = [numpy.int32, numpy.int64] -unsigned_int_types = [] +float_types = [numpy.float16, numpy.float32, numpy.float64] +complex_types = [numpy.complex64, numpy.complex128] +signed_int_types = [numpy.int8, numpy.int16, numpy.int32, numpy.int64] +unsigned_int_types = [numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64] int_types = signed_int_types + unsigned_int_types -all_types = float_types + int_types + complex_types +all_types = [numpy.bool_] + float_types + int_types + complex_types +negative_types = [numpy.bool_] + float_types + signed_int_types + complex_types negative_types_wo_fp16 = ( [numpy.bool_] - + float_types + + [numpy.float32, numpy.float64] + [numpy.int16, numpy.int32, numpy.int64] + complex_types ) -negative_types = float_types + signed_int_types + complex_types -negative_no_complex_types = float_types + signed_int_types -no_complex_types = float_types + int_types +negative_no_complex_types = [numpy.bool_] + float_types + signed_int_types +no_complex_types = [numpy.bool_] + float_types + int_types @testing.parameterize( @@ -31,12 +30,7 @@ testing.product( { "nargs": [1], - "name": [ - "reciprocal", - "conj", - "conjugate", - "angle", - ], + "name": ["reciprocal", "conj", "conjugate", "angle"], } ) + testing.product( @@ -52,7 +46,6 @@ "floor_divide", "fmod", "remainder", - "mod", ], } ) @@ -128,14 +121,17 @@ class TestArithmeticUnary: @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_unary(self, xp): arg1 = self.arg1 - arg1 = xp.asarray(arg1) + if isinstance(arg1, numpy.ndarray): + arg1 = xp.asarray(arg1) if self.name in ("reciprocal") and xp is numpy: # In NumPy, for integer arguments with absolute value larger than 1 the result is always zero. # We need to convert the input data type to float then compare the output with DPNP. - if isinstance(arg1, numpy.ndarray) and numpy.issubdtype( - arg1.dtype, numpy.integer - ): + if arg1.dtype.char in "bB": # int8 + arg1 = xp.asarray(arg1, dtype=numpy.float16) + elif arg1.dtype.char in "hH": # int16 + arg1 = xp.asarray(arg1, dtype=numpy.float32) + elif arg1.dtype.char in "iIlL": # int32, int64 np_dtype = ( numpy.float64 if has_support_aspect64() else numpy.float32 ) @@ -143,32 +139,18 @@ def test_unary(self, xp): if self.name in {"angle"}: y = getattr(xp, self.name)(arg1, self.deg) - # In NumPy, for boolean arguments the output data type is always default floating data type. - # while data type of output in DPNP is determined by Type Promotion Rules. - if ( - isinstance(arg1, cupy.ndarray) - and cupy.issubdtype(arg1.dtype, cupy.bool) - and has_support_aspect64() - ): - y = y.astype(cupy.float64) + if isinstance(arg1, cupy.ndarray): + if arg1.dtype == cupy.bool and has_support_aspect64(): + # In NumPy, for boolean input the output data type is always default floating data type. + # while data type of output in DPNP is determined by Type Promotion Rules. + y = y.astype(cupy.float64) + elif arg1.dtype.char in "bBe" and has_support_aspect16(): + # In NumPy, for int8, uint8 and float16 inputs the output data type is always float16. + # while data type of output in DPNP is float32. + y = y.astype(cupy.float16) else: y = getattr(xp, self.name)(arg1) - # if self.name in ("real", "imag"): - # Some NumPy functions return Python scalars for Python scalar - # inputs. - # We need to convert them to arrays to compare with CuPy outputs. - # if xp is numpy and isinstance(arg1, (bool, int, float, complex)): - # y = xp.asarray(y) - - # TODO(niboshi): Fix this - # numpy.real and numpy.imag return Python int if the input is - # Python bool. CuPy should return an array of dtype.int32 or - # dtype.int64 (depending on the platform) in such cases, instead - # of an array of dtype.bool. - # if xp is cupy and isinstance(arg1, bool): - # y = y.astype(int) - return y @@ -210,9 +192,61 @@ def test_imag_nocomplex(self, xp, dtype): imag = xp.imag(x) return imag + @pytest.mark.skip("'dpnp_array' object has no attribute 'base' yet") + @testing.for_complex_dtypes() + @testing.numpy_cupy_array_equal() + def test_real_ndarray_complex(self, xp, dtype): + x = testing.shaped_arange(self.shape, xp, dtype=dtype) + x_ = x.copy() + real = x_.real + # real returns a view + assert real.base is x_ + x_ += 1 + 1j + testing.assert_array_equal(real, x.real + 1) + return real + + @pytest.mark.skip("'dpnp_array' object has no attribute 'base' yet") + @testing.for_complex_dtypes() + @testing.numpy_cupy_array_equal() + def test_real_complex(self, xp, dtype): + x = testing.shaped_arange(self.shape, xp, dtype=dtype) + x_ = x.copy() + real = xp.real(x_) + # real returns a view + assert real.base is x_ + x_ += 1 + 1j + testing.assert_array_equal(real, x.real + 1) + return real + + @pytest.mark.skip("'dpnp_array' object has no attribute 'base' yet") + @testing.for_complex_dtypes() + @testing.numpy_cupy_array_equal() + def test_imag_ndarray_complex(self, xp, dtype): + x = testing.shaped_arange(self.shape, xp, dtype=dtype) + x_ = x.copy() + imag = x_.imag + # imag returns a view + assert imag.base is x_ + x_ += 1 + 1j + testing.assert_array_equal(imag, x.imag + 1) + return imag + + @pytest.mark.skip("'dpnp_array' object has no attribute 'base' yet") + @testing.for_complex_dtypes() + @testing.numpy_cupy_array_equal() + def test_imag_complex(self, xp, dtype): + x = testing.shaped_arange(self.shape, xp, dtype=dtype) + x_ = x.copy() + imag = xp.imag(x_) + # imag returns a view + assert imag.base is x_ + x_ += 1 + 1j + testing.assert_array_equal(imag, x.imag + 1) + return imag + class ArithmeticBinaryBase: - @testing.numpy_cupy_allclose(atol=1e-4, type_check=False) + @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64()) def check_binary(self, xp): arg1 = self.arg1 arg2 = self.arg2 @@ -221,15 +255,37 @@ def check_binary(self, xp): dtype1 = np1.dtype dtype2 = np2.dtype - # TODO(niboshi): Fix this: xp.add(0j, xp.array([2.], 'f')).dtype - # numpy => complex64 - # # cupy => complex128 - # if isinstance(arg1, complex): - # if dtype2 in (numpy.float16, numpy.float32): - # return xp.array(True) - - arg1 = xp.asarray(arg1) - arg2 = xp.asarray(arg2) + if xp.isscalar(arg1) and xp.isscalar(arg2): + pytest.skip("both scalar inputs is not supported") + + if self.name == "power": + # TODO(niboshi): Fix this: power(0, 1j) + # numpy => 1+0j + # cupy => 0j + if dtype2 in complex_types and (np1 == 0).any(): + return xp.array(True) + # TODO: Fix this: power(0j, 0) + # numpy => 1+0j + # cupy => nan+nanj + elif dtype1 in complex_types and (np2 == 0).any(): + return xp.array(True) + + if self.name in ("true_divide", "floor_divide", "fmod", "remainder"): + if dtype1.kind in "u" and xp.isscalar(arg2) and arg2 < 0: + # TODO: Fix this: array(3, dtype=uint) / -2 + # numpy => -1.5 + # cupy => 0.01181102 + pytest.skip("TODO: add gh issue") + if dtype2.kind in "u" and xp.isscalar(arg1) and arg1 < 0: + # TODO: Fix this: 2 / array(3, dtype=uint) + # numpy => -0.666667 + # cupy => 84.666667 + pytest.skip("TODO: add gh issue") + + if isinstance(arg1, numpy.ndarray): + arg1 = xp.asarray(arg1) + if isinstance(arg2, numpy.ndarray): + arg2 = xp.asarray(arg2) # Subtraction between booleans is not allowed. if ( @@ -255,15 +311,6 @@ def check_binary(self, xp): if dtype1 in (numpy.float16, numpy.float32): y = y.astype(numpy.complex64) - # NumPy returns an output array of another type than DPNP when input ones have different types. - if xp is numpy and dtype1 != dtype2: - is_array_arg1 = not xp.isscalar(arg1) - is_array_arg2 = not xp.isscalar(arg2) - - is_int_float = lambda _x, _y: numpy.issubdtype( - _x, numpy.integer - ) and numpy.issubdtype(_y, numpy.floating) - return y @@ -271,16 +318,17 @@ def check_binary(self, xp): *( testing.product( { + # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13 "arg1": [ testing.shaped_arange((2, 3), numpy, dtype=d) for d in all_types ] - + [0, 0.0, 2, 2.0], + + [0, 0.0, 0j, 2, 2.0, 2j, True, False], "arg2": [ testing.shaped_reverse_arange((2, 3), numpy, dtype=d) for d in all_types ] - + [0, 0.0, 2, 2.0], + + [0, 0.0, 0j, 2, 2.0, 2j, True, False], "name": ["add", "multiply", "power", "subtract"], } ) @@ -290,19 +338,18 @@ def check_binary(self, xp): numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) for d in negative_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 0j, 2, 2.0, 2j, -2, -2.0, -2j, True, False], "arg2": [ numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) for d in negative_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 0j, 2, 2.0, 2j, -2, -2.0, -2j, True, False], "name": ["divide", "true_divide", "subtract"], } ) ) ) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -class TestArithmeticBinary(ArithmeticBinaryBase, unittest.TestCase): +class TestArithmeticBinary(ArithmeticBinaryBase): def test_binary(self): self.use_dtype = False self.check_binary() @@ -311,19 +358,36 @@ def test_binary(self): @testing.parameterize( *( testing.product( + { + "arg1": [ + numpy.array([3, 2, 1, 1, 2, 3], dtype=d) + for d in unsigned_int_types + ] + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], + "arg2": [ + numpy.array([3, 2, 1, 1, 2, 3], dtype=d) + for d in unsigned_int_types + ] + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], + "name": ["true_divide"], + "dtype": [cupy.default_float_type()], + "use_dtype": [True, False], + } + ) + + testing.product( { "arg1": [ numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) - for d in int_types + for d in signed_int_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], "arg2": [ numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) - for d in int_types + for d in signed_int_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], "name": ["true_divide"], - "dtype": float_types, + "dtype": [cupy.default_float_type()], "use_dtype": [True, False], } ) @@ -340,7 +404,7 @@ def test_binary(self): ] + [0.0, 2.0, -2.0], "name": ["power", "true_divide", "subtract"], - "dtype": float_types, + "dtype": [cupy.default_float_type()], "use_dtype": [True, False], } ) @@ -350,14 +414,14 @@ def test_binary(self): testing.shaped_arange((2, 3), numpy, dtype=d) for d in no_complex_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], "arg2": [ testing.shaped_reverse_arange((2, 3), numpy, dtype=d) for d in no_complex_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], - "name": ["floor_divide", "fmod", "remainder", "mod"], - "dtype": float_types, + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], + "name": ["floor_divide", "fmod", "remainder"], + "dtype": [cupy.default_float_type()], "use_dtype": [True, False], } ) @@ -367,31 +431,229 @@ def test_binary(self): numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) for d in negative_no_complex_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], "arg2": [ numpy.array([-3, -2, -1, 1, 2, 3], dtype=d) for d in negative_no_complex_types ] - + [0, 0.0, 2, 2.0, -2, -2.0], - "name": ["floor_divide", "fmod", "remainder", "mod"], - "dtype": float_types, + + [0, 0.0, 2, 2.0, -2, -2.0, True, False], + "name": ["floor_divide", "fmod", "remainder"], + "dtype": [cupy.default_float_type()], "use_dtype": [True, False], } ) ) ) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -class TestArithmeticBinary2(ArithmeticBinaryBase, unittest.TestCase): +class TestArithmeticBinary2(ArithmeticBinaryBase): def test_binary(self): - if ( - self.use_dtype - and numpy.lib.NumpyVersion(numpy.__version__) < "1.10.0" - ): - raise unittest.SkipTest("NumPy>=1.10") self.check_binary() -class TestArithmeticModf(unittest.TestCase): +@pytest.mark.skip("'casting' keyword is not supported yet") +class UfuncTestBase: + @testing.numpy_cupy_allclose(accept_error=TypeError) + def check_casting_out(self, in0_type, in1_type, out_type, casting, xp): + a = testing.shaped_arange((2, 3), xp, in0_type) + b = testing.shaped_arange((2, 3), xp, in1_type) + c = xp.zeros((2, 3), out_type) + if casting != "unsafe": + # may raise TypeError + return xp.add(a, b, out=c, casting=casting) + + with warnings.catch_warnings(record=True) as ws: + warnings.simplefilter("always") + ret = xp.add(a, b, out=c, casting=casting) + ws = [w.category for w in ws] + assert all([w == numpy.ComplexWarning for w in ws]), str(ws) + return ret, xp.array(len(ws)) + + @testing.numpy_cupy_allclose(accept_error=TypeError) + def check_casting_dtype(self, in0_type, in1_type, dtype, casting, xp): + a = testing.shaped_arange((2, 3), xp, in0_type) + b = testing.shaped_arange((2, 3), xp, in1_type) + if casting != "unsafe": + # may raise TypeError + return xp.add(a, b, dtype=dtype, casting=casting) + + with warnings.catch_warnings(record=True) as ws: + warnings.simplefilter("always") + ret = xp.add(a, b, dtype=dtype, casting="unsafe") + ws = [w.category for w in ws] + assert all([w == numpy.ComplexWarning for w in ws]), str(ws) + return ret, xp.array(len(ws)) + + # delete this, once check_casting_dtype passes + @testing.numpy_cupy_allclose() + def check_casting_dtype_unsafe_ignore_warnings( + self, in0_type, in1_type, dtype, xp + ): + a = testing.shaped_arange((2, 3), xp, in0_type) + b = testing.shaped_arange((2, 3), xp, in1_type) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return xp.add(a, b, dtype=dtype, casting="unsafe") + + +class TestUfunc(UfuncTestBase): + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + "safe", + "same_kind", + "unsafe", + ], + ) + @testing.for_all_dtypes_combination(names=["in_type", "out_type"]) + def test_casting_out_only(self, in_type, out_type, casting): + self.check_casting_out(in_type, in_type, out_type, casting) + + @pytest.mark.parametrize( + "casting", + [ + pytest.param("no", marks=pytest.mark.skip("flaky xfail")), + pytest.param("equiv", marks=pytest.mark.skip("flaky xfail")), + "safe", + "same_kind", + "unsafe", + ], + ) + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "out_type"], full=False + ) + def test_casting_in_out(self, in0_type, in1_type, out_type, casting): + self.check_casting_out(in0_type, in1_type, out_type, casting) + + @pytest.mark.xfail() + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + ], + ) + @pytest.mark.parametrize( + ("in0_type", "in1_type", "out_type"), + [ + (numpy.int16, numpy.int32, numpy.int32), + ], + ) + def test_casting_in_xfail1(self, in0_type, in1_type, out_type, casting): + self.check_casting_out(in0_type, in1_type, out_type, casting) + + @pytest.mark.skip("flaky xfail") + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + "safe", + "same_kind", + "unsafe", + ], + ) + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "dtype"], full=False + ) + def test_casting_dtype(self, in0_type, in1_type, dtype, casting): + self.check_casting_dtype(in0_type, in1_type, dtype, casting) + + @pytest.mark.xfail() + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + ], + ) + @pytest.mark.parametrize( + ("in0_type", "in1_type", "dtype"), + [ + (numpy.int16, numpy.int32, numpy.int32), + ], + ) + def test_casting_dtype_xfail1(self, in0_type, in1_type, dtype, casting): + self.check_casting_dtype(in0_type, in1_type, dtype, casting) + + @pytest.mark.xfail() + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + "safe", + "same_kind", + ], + ) + @pytest.mark.parametrize( + ("in0_type", "in1_type", "dtype"), + [ + (numpy.int32, numpy.int32, numpy.bool_), + (numpy.float64, numpy.float64, numpy.int32), + ], + ) + def test_casting_dtype_xfail2(self, in0_type, in1_type, dtype, casting): + self.check_casting_dtype(in0_type, in1_type, dtype, casting) + + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "dtype"], full=False + ) + def test_casting_dtype_unsafe_ignore_warnings( + self, in0_type, in1_type, dtype + ): + self.check_casting_dtype_unsafe_ignore_warnings( + in0_type, in1_type, dtype + ) + + +@testing.slow +class TestUfuncSlow(UfuncTestBase): + @pytest.mark.parametrize( + "casting", + [ + pytest.param("no", marks=pytest.mark.xfail()), + pytest.param("equiv", marks=pytest.mark.xfail()), + "safe", + "same_kind", + "unsafe", + ], + ) + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "out_type"], full=True + ) + def test_casting_out(self, in0_type, in1_type, out_type, casting): + self.check_casting_out(in0_type, in1_type, out_type, casting) + + @pytest.mark.xfail() + @pytest.mark.parametrize( + "casting", + [ + "no", + "equiv", + "safe", + "same_kind", + "unsafe", + ], + ) + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "dtype"], full=True + ) + def test_casting_dtype(self, in0_type, in1_type, dtype, casting): + self.check_casting_dtype(in0_type, in1_type, dtype, casting) + + @testing.for_all_dtypes_combination( + names=["in0_type", "in1_type", "dtype"], full=True + ) + def test_casting_dtype_unsafe_ignore_warnings( + self, in0_type, in1_type, dtype + ): + self.check_casting_dtype_unsafe_ignore_warnings( + in0_type, in1_type, dtype + ) + + +class TestArithmeticModf: @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_modf(self, xp, dtype): @@ -406,11 +668,9 @@ def test_modf(self, xp, dtype): @testing.parameterize( *testing.product({"xp": [numpy, cupy], "shape": [(3, 2), (), (3, 0, 2)]}) ) -class TestBoolSubtract(unittest.TestCase): +class TestBoolSubtract: def test_bool_subtract(self): xp = self.xp - if xp is numpy and not testing.numpy_satisfies(">=1.14.0"): - raise unittest.SkipTest("NumPy<1.14.0") shape = self.shape x = testing.shaped_random(shape, xp, dtype=numpy.bool_) y = testing.shaped_random(shape, xp, dtype=numpy.bool_) From 3f1b52c18b8e4ad3a449e58964e0664829e2f6c9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 14 Jun 2024 20:06:22 +0200 Subject: [PATCH 32/35] Add a link to gh issue in arithmetic tests --- tests/third_party/cupy/math_tests/test_arithmetic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 8037c4f4002c..dbc8b12097bd 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -275,12 +275,12 @@ def check_binary(self, xp): # TODO: Fix this: array(3, dtype=uint) / -2 # numpy => -1.5 # cupy => 0.01181102 - pytest.skip("TODO: add gh issue") + pytest.skip("due to dpctl gh-1711") if dtype2.kind in "u" and xp.isscalar(arg1) and arg1 < 0: # TODO: Fix this: 2 / array(3, dtype=uint) # numpy => -0.666667 # cupy => 84.666667 - pytest.skip("TODO: add gh issue") + pytest.skip("due to dpctl gh-1711") if isinstance(arg1, numpy.ndarray): arg1 = xp.asarray(arg1) From aa3fbf2984d07ba6ab5bef3bf977838e8a613244 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 17 Jun 2024 11:04:47 +0200 Subject: [PATCH 33/35] Suppress divide warning --- tests/test_mathematical.py | 1 + .../cupy/math_tests/test_arithmetic.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 325816f9a008..ae2c73748b56 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1316,6 +1316,7 @@ def test_float_remainder_magnitude(dtype): assert_equal(result, expected) +@pytest.mark.usefixtures("suppress_divide_numpy_warnings") @pytest.mark.usefixtures("suppress_invalid_numpy_warnings") @pytest.mark.parametrize("func", ["remainder", "fmod"]) @pytest.mark.parametrize("dtype", get_float_dtypes(no_float16=False)) diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index dbc8b12097bd..7a7d10143887 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -127,14 +127,16 @@ def test_unary(self, xp): if self.name in ("reciprocal") and xp is numpy: # In NumPy, for integer arguments with absolute value larger than 1 the result is always zero. # We need to convert the input data type to float then compare the output with DPNP. - if arg1.dtype.char in "bB": # int8 - arg1 = xp.asarray(arg1, dtype=numpy.float16) - elif arg1.dtype.char in "hH": # int16 - arg1 = xp.asarray(arg1, dtype=numpy.float32) - elif arg1.dtype.char in "iIlL": # int32, int64 - np_dtype = ( - numpy.float64 if has_support_aspect64() else numpy.float32 - ) + if numpy.issubdtype(arg1.dtype, numpy.integer): + if arg1.dtype.char in "bB": # int8 + np_dtype = numpy.float16 + elif arg1.dtype.char in "hH": # int16 + np_dtype = numpy.float32 + else: # int32, int64 + if has_support_aspect64(): + np_dtype = numpy.float64 + else: + np_dtype = numpy.float32 arg1 = xp.asarray(arg1, dtype=np_dtype) if self.name in {"angle"}: From 2733489dfa4dd97031b7c677bf2df84d94cf3ea9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 17 Jun 2024 11:41:26 +0200 Subject: [PATCH 34/35] Resolve compilation warning --- .../backend/extensions/ufunc/elementwise_functions/populate.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp index 9470a41b2ce3..0b3cc8dac152 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp @@ -180,7 +180,7 @@ template \ sycl::event __name__##_strided_impl( \ sycl::queue &exec_q, size_t nelems, int nd, \ - const ssize_t *shape_and_strides, const char *arg1_p, \ + const py::ssize_t *shape_and_strides, const char *arg1_p, \ py::ssize_t arg1_offset, const char *arg2_p, py::ssize_t arg2_offset, \ char *res_p, py::ssize_t res_offset, \ const std::vector &depends, \ From 62d8e0ab206af91056549c5ad71e48f515dfba91 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 19 Jun 2024 15:05:54 +0200 Subject: [PATCH 35/35] Updated docstring description of inputs per review comment --- dpnp/dpnp_iface_bitwise.py | 43 +++++++++----- dpnp/dpnp_iface_logic.py | 66 ++++++++++++++++------ dpnp/dpnp_iface_mathematical.py | 97 ++++++++++++++++++++++++-------- dpnp/dpnp_iface_trigonometric.py | 49 +++++++++++++--- 4 files changed, 190 insertions(+), 65 deletions(-) diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py index 21ee7cc3d827..6a9c44b813e8 100644 --- a/dpnp/dpnp_iface_bitwise.py +++ b/dpnp/dpnp_iface_bitwise.py @@ -65,14 +65,16 @@ Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have integer or boolean data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have integer or boolean data - type. + type. Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -132,14 +134,16 @@ Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have integer or boolean data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have integer or boolean data - type. + type. Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -194,14 +198,16 @@ Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have integer or boolean data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have integer or boolean data - type. + type. Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -264,6 +270,7 @@ out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -326,14 +333,17 @@ Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have integer data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have integer data type. - Each element must be greater than or equal to 0. + Each element must be greater than or equal to ``0``. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -384,14 +394,17 @@ Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have integer data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have integer data type. - Each element must be greater than or equal to 0. + Each element must be greater than or equal to ``0``. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index d780cf578bf8..7809a81771ea 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -313,10 +313,12 @@ def any(x, /, axis=None, out=None, keepdims=False, *, where=True): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array have the correct shape and the expected data type. @@ -382,13 +384,16 @@ def any(x, /, axis=None, out=None, keepdims=False, *, where=True): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -445,13 +450,16 @@ def any(x, /, axis=None, out=None, keepdims=False, *, where=True): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -556,6 +564,7 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -615,6 +624,7 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -668,6 +678,7 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -718,13 +729,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -781,13 +795,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -844,13 +861,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -913,6 +933,7 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -961,13 +982,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1026,13 +1050,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1089,13 +1116,16 @@ def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 948243b0035c..1fe7839f5967 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -342,6 +342,7 @@ def _gradient_num_diff_edges( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -403,13 +404,16 @@ def _gradient_num_diff_edges( Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -479,6 +483,7 @@ def _gradient_num_diff_edges( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -536,6 +541,7 @@ def around(x, /, decimals=0, out=None): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. Returns ------- @@ -572,6 +578,7 @@ def around(x, /, decimals=0, out=None): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -698,6 +705,7 @@ def clip(a, a_min, a_max, *, out=None, order="K", **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -761,14 +769,17 @@ def convolve(a, v, mode="full"): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real floating-point data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real floating-point data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1235,13 +1246,16 @@ def diff(a, n=1, axis=-1, prepend=None, append=None): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1362,6 +1376,7 @@ def ediff1d(x1, to_end=None, to_begin=None): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1410,6 +1425,7 @@ def ediff1d(x1, to_end=None, to_begin=None): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1462,13 +1478,16 @@ def ediff1d(x1, to_end=None, to_begin=None): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1758,13 +1777,16 @@ def fmin(x1, x2, /, out=None, *, where=True, dtype=None, subok=True, **kwargs): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real-valued data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real-valued data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2032,6 +2054,7 @@ def gradient(f, *varargs, axis=None, edge_order=1): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2082,13 +2105,16 @@ def gradient(f, *varargs, axis=None, edge_order=1): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2154,13 +2180,16 @@ def gradient(f, *varargs, axis=None, edge_order=1): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2256,13 +2285,16 @@ def modf(x1, **kwargs): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2329,6 +2361,7 @@ def modf(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2384,6 +2417,7 @@ def modf(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2439,13 +2473,16 @@ def modf(x1, **kwargs): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional - Output array to populate. Array must have the correct - shape and the expected data type. + Output array to populate. Array must have the correct shape and + the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2626,6 +2663,7 @@ def prod( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2702,13 +2740,16 @@ def prod( Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real-valued data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real-valued data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2784,6 +2825,7 @@ def prod( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2843,6 +2885,7 @@ def prod( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. Returns ------- @@ -2899,6 +2942,7 @@ def prod( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2953,6 +2997,7 @@ def prod( out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -2999,13 +3044,16 @@ def prod( Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have numeric data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have numeric data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -3289,6 +3337,7 @@ def trapz(y1, x1=None, dx=1.0, axis=-1): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py index d38af96ea2cf..4d5703cfc6ce 100644 --- a/dpnp/dpnp_iface_trigonometric.py +++ b/dpnp/dpnp_iface_trigonometric.py @@ -121,6 +121,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -175,6 +176,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -229,6 +231,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -282,7 +285,8 @@ def _get_accumulation_res_dt(a, dtype, _out): Input array, expected to have numeric data type. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. - Array must have the correct shape and the expected data type.. + Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -336,7 +340,8 @@ def _get_accumulation_res_dt(a, dtype, _out): Input array, expected to have numeric data type. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. - Array must have the correct shape and the expected data type.. + Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -390,15 +395,18 @@ def _get_accumulation_res_dt(a, dtype, _out): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real-valued floating-point data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real-valued floating-point data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -466,6 +474,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -520,6 +529,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -571,6 +581,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -624,6 +635,7 @@ def _get_accumulation_res_dt(a, dtype, _out): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -820,6 +832,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -872,6 +885,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -927,6 +941,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -982,13 +997,16 @@ def degrees(x1, **kwargs): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real-valued data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real-valued data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1049,6 +1067,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1103,6 +1122,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1162,6 +1182,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1221,6 +1242,7 @@ def degrees(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1278,15 +1300,18 @@ def degrees(x1, **kwargs): Parameters ---------- -x1 : {dpnp.ndarray, usm_ndarray} +x1 : {dpnp.ndarray, usm_ndarray, scalar} First input array, expected to have a real-valued floating-point data type. -x2 : {dpnp.ndarray, usm_ndarray} + Both inputs `x1` and `x2` can not be scalars at the same time. +x2 : {dpnp.ndarray, usm_ndarray, scalar} Second input array, also expected to have a real-valued floating-point data type. + Both inputs `x1` and `x2` can not be scalars at the same time. out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1426,6 +1451,7 @@ def logsumexp(x, /, *, axis=None, dtype=None, keepdims=False, out=None): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1556,6 +1582,7 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): out : ({None, dpnp.ndarray, usm_ndarray}, optional): Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : ({'C', 'F', 'A', 'K'}, optional): Memory layout of the newly output array, if parameter `out` is `None`. Default: ``"K"`` @@ -1660,6 +1687,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1713,6 +1741,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1765,6 +1794,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1820,6 +1850,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1874,6 +1905,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``. @@ -1927,6 +1959,7 @@ def radians(x1, **kwargs): out : {None, dpnp.ndarray, usm_ndarray}, optional Output array to populate. Array must have the correct shape and the expected data type. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array, if parameter `out` is ``None``. Default: ``"K"``.