From 0a11646550b7dac932b6955a9fe6c377beeb087a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Sep 2023 12:22:16 -0500 Subject: [PATCH 1/9] Avoid unncessary argument copies This is in response to about 11K instances of COPY_INSTEAD_OF_MOVE issues in Coverity report. --- .../include/kernels/constructors.hpp | 24 +- .../tensor/libtensor/source/accumulators.cpp | 15 +- .../tensor/libtensor/source/accumulators.hpp | 10 +- .../source/boolean_advanced_indexing.cpp | 28 +- .../source/boolean_advanced_indexing.hpp | 30 +- .../libtensor/source/boolean_reductions.cpp | 12 +- .../libtensor/source/boolean_reductions.hpp | 4 +- .../source/copy_and_cast_usm_to_usm.cpp | 4 +- .../source/copy_and_cast_usm_to_usm.hpp | 4 +- .../libtensor/source/copy_for_reshape.cpp | 4 +- .../libtensor/source/copy_for_reshape.hpp | 4 +- .../tensor/libtensor/source/copy_for_roll.cpp | 8 +- .../tensor/libtensor/source/copy_for_roll.hpp | 8 +- .../copy_numpy_ndarray_into_usm_ndarray.cpp | 4 +- .../copy_numpy_ndarray_into_usm_ndarray.hpp | 4 +- .../source/device_support_queries.cpp | 22 +- .../source/device_support_queries.hpp | 10 +- .../source/elementwise_functions.cpp | 694 ++++++++++-------- .../source/elementwise_functions.hpp | 20 +- dpctl/tensor/libtensor/source/eye_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/eye_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/full_ctor.cpp | 4 +- dpctl/tensor/libtensor/source/full_ctor.hpp | 4 +- .../source/integer_advanced_indexing.cpp | 14 +- .../source/integer_advanced_indexing.hpp | 12 +- .../libtensor/source/linear_sequences.cpp | 12 +- .../libtensor/source/linear_sequences.hpp | 12 +- dpctl/tensor/libtensor/source/repeat.cpp | 16 +- dpctl/tensor/libtensor/source/repeat.hpp | 16 +- .../libtensor/source/sum_reductions.cpp | 8 +- dpctl/tensor/libtensor/source/triul_ctor.cpp | 4 +- dpctl/tensor/libtensor/source/triul_ctor.hpp | 4 +- dpctl/tensor/libtensor/source/where.cpp | 8 +- dpctl/tensor/libtensor/source/where.hpp | 8 +- 34 files changed, 561 insertions(+), 474 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/constructors.hpp b/dpctl/tensor/libtensor/include/kernels/constructors.hpp index 49111cbb61..e9745b2635 100644 --- a/dpctl/tensor/libtensor/include/kernels/constructors.hpp +++ b/dpctl/tensor/libtensor/include/kernels/constructors.hpp @@ -57,12 +57,12 @@ using namespace dpctl::tensor::offset_utils; * @brief Cast pybind11 class managing Python object to specified type `T`. * @defgroup CtorKernels */ -template T unbox_py_scalar(py::object o) +template T unbox_py_scalar(const py::object &o) { return py::cast(o); } -template <> inline sycl::half unbox_py_scalar(py::object o) +template <> inline sycl::half unbox_py_scalar(const py::object &o) { float tmp = py::cast(o); return static_cast(tmp); @@ -74,8 +74,8 @@ template <> inline sycl::half unbox_py_scalar(py::object o) typedef sycl::event (*lin_space_step_fn_ptr_t)( sycl::queue, size_t, // num_elements - py::object start, - py::object step, + const py::object &start, + const py::object &step, char *, // dst_data_ptr const std::vector &); @@ -164,8 +164,8 @@ sycl::event lin_space_step_impl(sycl::queue exec_q, template sycl::event lin_space_step_impl(sycl::queue exec_q, size_t nelems, - py::object start, - py::object step, + const py::object &start, + const py::object &step, char *array_data, const std::vector &depends) { @@ -204,8 +204,8 @@ template struct LinSpaceStepFactory typedef sycl::event (*lin_space_affine_fn_ptr_t)( sycl::queue, size_t, // num_elements - py::object start, - py::object end, + const py::object &start, + const py::object &end, bool include_endpoint, char *, // dst_data_ptr const std::vector &); @@ -335,8 +335,8 @@ sycl::event lin_space_affine_impl(sycl::queue exec_q, template sycl::event lin_space_affine_impl(sycl::queue exec_q, size_t nelems, - py::object start, - py::object end, + const py::object &start, + const py::object &end, bool include_endpoint, char *array_data, const std::vector &depends) @@ -372,7 +372,7 @@ template struct LinSpaceAffineFactory typedef sycl::event (*full_contig_fn_ptr_t)(sycl::queue, size_t, - py::object, + const py::object &, char *, const std::vector &); @@ -427,7 +427,7 @@ sycl::event full_contig_impl(sycl::queue q, template sycl::event full_contig_impl(sycl::queue exec_q, size_t nelems, - py::object py_value, + const py::object &py_value, char *dst_p, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/accumulators.cpp b/dpctl/tensor/libtensor/source/accumulators.cpp index 5ce863ad3f..e52283ef24 100644 --- a/dpctl/tensor/libtensor/source/accumulators.cpp +++ b/dpctl/tensor/libtensor/source/accumulators.cpp @@ -97,10 +97,10 @@ void populate_mask_positions_dispatch_vectors(void) return; } -size_t py_mask_positions(dpctl::tensor::usm_ndarray mask, - dpctl::tensor::usm_ndarray cumsum, +size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, + const dpctl::tensor::usm_ndarray &cumsum, sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { // cumsum is 1D if (cumsum.get_ndim() != 1) { @@ -155,7 +155,8 @@ size_t py_mask_positions(dpctl::tensor::usm_ndarray mask, ? mask_positions_contig_i32_dispatch_vector[mask_typeid] : mask_positions_contig_i64_dispatch_vector[mask_typeid]; - return fn(exec_q, mask_size, mask_data, cumsum_data, depends); + return fn(std::move(exec_q), mask_size, mask_data, cumsum_data, + depends); } const py::ssize_t *shape = mask.get_shape_raw(); @@ -233,8 +234,8 @@ void populate_cumsum_1d_dispatch_vectors(void) return; } -size_t py_cumsum_1d(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray cumsum, +size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &cumsum, sycl::queue exec_q, std::vector const &depends) { @@ -290,7 +291,7 @@ size_t py_cumsum_1d(dpctl::tensor::usm_ndarray src, "this cumsum requires integer type, got src_typeid=" + std::to_string(src_typeid)); } - return fn(exec_q, src_size, src_data, cumsum_data, depends); + return fn(std::move(exec_q), src_size, src_data, cumsum_data, depends); } const py::ssize_t *shape = src.get_shape_raw(); diff --git a/dpctl/tensor/libtensor/source/accumulators.hpp b/dpctl/tensor/libtensor/source/accumulators.hpp index f4e5ce9d84..e3cdb035bc 100644 --- a/dpctl/tensor/libtensor/source/accumulators.hpp +++ b/dpctl/tensor/libtensor/source/accumulators.hpp @@ -39,15 +39,15 @@ namespace py_internal extern void populate_mask_positions_dispatch_vectors(void); -extern size_t py_mask_positions(dpctl::tensor::usm_ndarray mask, - dpctl::tensor::usm_ndarray cumsum, +extern size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, + const dpctl::tensor::usm_ndarray &cumsum, sycl::queue exec_q, - std::vector const &depends = {}); + const std::vector &depends = {}); extern void populate_cumsum_1d_dispatch_vectors(void); -extern size_t py_cumsum_1d(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray cumsum, +extern size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &cumsum, sycl::queue exec_q, std::vector const &depends = {}); diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index 43ab92b86d..1bbb4109b6 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -110,13 +110,13 @@ void populate_masked_extract_dispatch_vectors(void) } std::pair -py_extract(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray cumsum, +py_extract(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, dpctl::tensor::usm_ndarray dst, sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { int src_nd = src.get_ndim(); if ((axis_start < 0 || axis_end > src_nd || axis_start >= axis_end)) { @@ -444,13 +444,13 @@ void populate_masked_place_dispatch_vectors(void) * ((i > 0) ? cumsum[i-1] + 1 : 1) */ std::pair -py_place(dpctl::tensor::usm_ndarray dst, - dpctl::tensor::usm_ndarray cumsum, +py_place(const dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, - dpctl::tensor::usm_ndarray rhs, + const dpctl::tensor::usm_ndarray &rhs, sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { int dst_nd = dst.get_ndim(); if ((axis_start < 0 || axis_end > dst_nd || axis_start >= axis_end)) { @@ -712,14 +712,14 @@ py_place(dpctl::tensor::usm_ndarray dst, // Non-zero std::pair -py_nonzero(dpctl::tensor::usm_ndarray - cumsum, // int32/int64 input array, 1D, C-contiguous - dpctl::tensor::usm_ndarray - indexes, // int32/int64 2D output array, C-contiguous - std::vector - mask_shape, // shape of array from which cumsum was computed +py_nonzero(const dpctl::tensor::usm_ndarray + &cumsum, // int32/int64 input array, 1D, C-contiguous + const dpctl::tensor::usm_ndarray + &indexes, // int32/int64 2D output array, C-contiguous + const std::vector + &mask_shape, // shape of array from which cumsum was computed sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { if (!dpctl::utils::queues_are_compatible(exec_q, {cumsum, indexes})) { throw py::value_error( diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp index 5ce868894a..cc920477fb 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp @@ -39,34 +39,36 @@ namespace py_internal { extern std::pair -py_extract(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray cumsum, +py_extract(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, dpctl::tensor::usm_ndarray dst, sycl::queue exec_q, - std::vector const &depends = {}); + const std::vector &depends = {}); extern void populate_masked_extract_dispatch_vectors(void); extern std::pair -py_place(dpctl::tensor::usm_ndarray dst, - dpctl::tensor::usm_ndarray cumsum, +py_place(const dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, - dpctl::tensor::usm_ndarray rhs, + const dpctl::tensor::usm_ndarray &rhs, sycl::queue exec_q, - std::vector const &depends = {}); + const std::vector &depends = {}); extern void populate_masked_place_dispatch_vectors(void); -extern std::pair py_nonzero( - dpctl::tensor::usm_ndarray cumsum, // int32 input array, 1D, C-contiguous - dpctl::tensor::usm_ndarray indexes, // int32 2D output array, C-contiguous - std::vector - mask_shape, // shape of array from which cumsum was computed - sycl::queue exec_q, - std::vector const &depends = {}); +extern std::pair +py_nonzero(const dpctl::tensor::usm_ndarray + &cumsum, // int32 input array, 1D, C-contiguous + const dpctl::tensor::usm_ndarray + &indexes, // int32 2D output array, C-contiguous + const std::vector + &mask_shape, // shape of array from which cumsum was computed + sycl::queue exec_q, + const std::vector &depends = {}); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.cpp b/dpctl/tensor/libtensor/source/boolean_reductions.cpp index db07d05c73..9840b1b912 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.cpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.cpp @@ -148,11 +148,11 @@ void init_boolean_reduction_functions(py::module_ m) using impl::all_reduction_axis1_contig_dispatch_vector; using impl::all_reduction_strided_dispatch_vector; - auto all_pyapi = [&](arrayT src, int trailing_dims_to_reduce, - arrayT dst, sycl::queue exec_q, + auto all_pyapi = [&](const arrayT &src, int trailing_dims_to_reduce, + const arrayT &dst, sycl::queue exec_q, const event_vecT &depends = {}) { return py_boolean_reduction( - src, trailing_dims_to_reduce, dst, exec_q, depends, + src, trailing_dims_to_reduce, dst, std::move(exec_q), depends, all_reduction_axis1_contig_dispatch_vector, all_reduction_axis0_contig_dispatch_vector, all_reduction_strided_dispatch_vector); @@ -169,11 +169,11 @@ void init_boolean_reduction_functions(py::module_ m) using impl::any_reduction_axis1_contig_dispatch_vector; using impl::any_reduction_strided_dispatch_vector; - auto any_pyapi = [&](arrayT src, int trailing_dims_to_reduce, - arrayT dst, sycl::queue exec_q, + auto any_pyapi = [&](const arrayT &src, int trailing_dims_to_reduce, + const arrayT &dst, sycl::queue exec_q, const event_vecT &depends = {}) { return py_boolean_reduction( - src, trailing_dims_to_reduce, dst, exec_q, depends, + src, trailing_dims_to_reduce, dst, std::move(exec_q), depends, any_reduction_axis1_contig_dispatch_vector, any_reduction_axis0_contig_dispatch_vector, any_reduction_strided_dispatch_vector); diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.hpp b/dpctl/tensor/libtensor/source/boolean_reductions.hpp index 591439a7c9..1099e21008 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.hpp @@ -49,9 +49,9 @@ namespace td_ns = dpctl::tensor::type_dispatch; template std::pair -py_boolean_reduction(dpctl::tensor::usm_ndarray src, +py_boolean_reduction(const dpctl::tensor::usm_ndarray &src, int trailing_dims_to_reduce, - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends, const contig_dispatchT &axis1_contig_dispatch_vector, diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index 383385e238..afd50e16bb 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -67,8 +67,8 @@ namespace py = pybind11; using dpctl::utils::keep_args_alive; std::pair -copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp index 109062516a..04e7075a68 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp @@ -38,8 +38,8 @@ namespace py_internal { extern std::pair -copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index 7114d87c47..4ea0706f87 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -58,8 +58,8 @@ static copy_for_reshape_fn_ptr_t * dst[np.multi_index(i, dst.shape)] = src[np.multi_index(i, src.shape)] */ std::pair -copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp index 32d41fc159..c78fcf1d86 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp @@ -38,8 +38,8 @@ namespace py_internal { extern std::pair -copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.cpp b/dpctl/tensor/libtensor/source/copy_for_roll.cpp index eee129932f..bac8e165dd 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.cpp @@ -69,8 +69,8 @@ static copy_for_roll_ndshift_strided_fn_ptr_t * dst[np.multi_index(i, dst.shape)] = src[np.multi_index(i, src.shape)] */ std::pair -copy_usm_ndarray_for_roll_1d(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, py::ssize_t shift, sycl::queue exec_q, const std::vector &depends) @@ -251,8 +251,8 @@ copy_usm_ndarray_for_roll_1d(dpctl::tensor::usm_ndarray src, } std::pair -copy_usm_ndarray_for_roll_nd(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, const std::vector &shifts, sycl::queue exec_q, const std::vector &depends) diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.hpp b/dpctl/tensor/libtensor/source/copy_for_roll.hpp index 0c00710e11..8e8112a6d3 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.hpp @@ -38,15 +38,15 @@ namespace py_internal { extern std::pair -copy_usm_ndarray_for_roll_1d(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, py::ssize_t shift, sycl::queue exec_q, const std::vector &depends = {}); extern std::pair -copy_usm_ndarray_for_roll_nd(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, const std::vector &shifts, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp index 0e7bc195e9..50e2f9eed0 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp @@ -54,8 +54,8 @@ static copy_and_cast_from_host_blocking_fn_ptr_t [td_ns::num_types]; void copy_numpy_ndarray_into_usm_ndarray( - py::array npy_src, - dpctl::tensor::usm_ndarray dst, + const py::array &npy_src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp index e5bf513921..b482bec3eb 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp @@ -38,8 +38,8 @@ namespace py_internal { extern void copy_numpy_ndarray_into_usm_ndarray( - py::array npy_src, - dpctl::tensor::usm_ndarray dst, + const py::array &npy_src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/device_support_queries.cpp b/dpctl/tensor/libtensor/source/device_support_queries.cpp index d04c9c9ed2..9f793cb00a 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.cpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.cpp @@ -39,7 +39,7 @@ namespace py_internal namespace { -std::string _default_device_fp_type(sycl::device d) +std::string _default_device_fp_type(const sycl::device &d) { if (d.has(sycl::aspect::fp64)) { return "f8"; @@ -49,14 +49,14 @@ std::string _default_device_fp_type(sycl::device d) } } -std::string _default_device_int_type(sycl::device) +std::string _default_device_int_type(const sycl::device &) { return "l"; // code for numpy.dtype('long') to be consistent // with NumPy's default integer type across // platforms. } -std::string _default_device_complex_type(sycl::device d) +std::string _default_device_complex_type(const sycl::device &d) { if (d.has(sycl::aspect::fp64)) { return "c16"; @@ -66,17 +66,17 @@ std::string _default_device_complex_type(sycl::device d) } } -std::string _default_device_bool_type(sycl::device) +std::string _default_device_bool_type(const sycl::device &) { return "b1"; } -std::string _default_device_index_type(sycl::device) +std::string _default_device_index_type(const sycl::device &) { return "i8"; } -sycl::device _extract_device(py::object arg) +sycl::device _extract_device(const py::object &arg) { auto const &api = dpctl::detail::dpctl_capi::get(); @@ -96,31 +96,31 @@ sycl::device _extract_device(py::object arg) } // namespace -std::string default_device_fp_type(py::object arg) +std::string default_device_fp_type(const py::object &arg) { sycl::device d = _extract_device(arg); return _default_device_fp_type(d); } -std::string default_device_int_type(py::object arg) +std::string default_device_int_type(const py::object &arg) { sycl::device d = _extract_device(arg); return _default_device_int_type(d); } -std::string default_device_bool_type(py::object arg) +std::string default_device_bool_type(const py::object &arg) { sycl::device d = _extract_device(arg); return _default_device_bool_type(d); } -std::string default_device_complex_type(py::object arg) +std::string default_device_complex_type(const py::object &arg) { sycl::device d = _extract_device(arg); return _default_device_complex_type(d); } -std::string default_device_index_type(py::object arg) +std::string default_device_index_type(const py::object &arg) { sycl::device d = _extract_device(arg); return _default_device_index_type(d); diff --git a/dpctl/tensor/libtensor/source/device_support_queries.hpp b/dpctl/tensor/libtensor/source/device_support_queries.hpp index 6626b3502a..3367f8bfc2 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.hpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.hpp @@ -37,11 +37,11 @@ namespace tensor namespace py_internal { -extern std::string default_device_fp_type(py::object); -extern std::string default_device_int_type(py::object); -extern std::string default_device_bool_type(py::object); -extern std::string default_device_complex_type(py::object); -extern std::string default_device_index_type(py::object); +extern std::string default_device_fp_type(const py::object &); +extern std::string default_device_int_type(const py::object &); +extern std::string default_device_bool_type(const py::object &); +extern std::string default_device_complex_type(const py::object &); +extern std::string default_device_index_type(const py::object &); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index cc95cecb38..32b8cf630d 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include "elementwise_functions.hpp" #include "utils/type_dispatch.hpp" @@ -2744,16 +2745,17 @@ void init_elementwise_functions(py::module_ m) using impl::abs_output_typeid_vector; using impl::abs_strided_dispatch_vector; - auto abs_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, + src, dst, std::move(exec_q), depends, abs_output_typeid_vector, abs_contig_dispatch_vector, abs_strided_dispatch_vector); }; m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto abs_result_type_pyapi = [&](py::dtype dtype) { + auto abs_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); }; m.def("_abs_result_type", abs_result_type_pyapi); @@ -2766,16 +2768,17 @@ void init_elementwise_functions(py::module_ m) using impl::acos_output_typeid_vector; using impl::acos_strided_dispatch_vector; - auto acos_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, acos_output_typeid_vector, + src, dst, std::move(exec_q), depends, acos_output_typeid_vector, acos_contig_dispatch_vector, acos_strided_dispatch_vector); }; m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto acos_result_type_pyapi = [&](py::dtype dtype) { + auto acos_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); }; m.def("_acos_result_type", acos_result_type_pyapi); @@ -2788,16 +2791,18 @@ void init_elementwise_functions(py::module_ m) using impl::acosh_output_typeid_vector; using impl::acosh_strided_dispatch_vector; - auto acosh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acosh_output_typeid_vector, - acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + acosh_output_typeid_vector, + acosh_contig_dispatch_vector, + acosh_strided_dispatch_vector); }; m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto acosh_result_type_pyapi = [&](py::dtype dtype) { + auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, acosh_output_typeid_vector); }; @@ -2813,12 +2818,14 @@ void init_elementwise_functions(py::module_ m) using impl::add_output_id_table; using impl::add_strided_dispatch_table; - auto add_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, sycl::queue exec_q, + auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, add_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + add_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) add_contig_dispatch_table, @@ -2832,7 +2839,8 @@ void init_elementwise_functions(py::module_ m) // c-contig row with broadcasting (may be nullptr) add_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto add_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto add_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, add_output_id_table); }; @@ -2846,11 +2854,11 @@ void init_elementwise_functions(py::module_ m) using impl::add_inplace_strided_dispatch_table; auto add_inplace_pyapi = - [&](dpctl::tensor::usm_ndarray src, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, add_output_id_table, + src, dst, std::move(exec_q), depends, add_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) add_inplace_contig_dispatch_table, @@ -2874,16 +2882,17 @@ void init_elementwise_functions(py::module_ m) using impl::asin_output_typeid_vector; using impl::asin_strided_dispatch_vector; - auto asin_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, asin_output_typeid_vector, + src, dst, std::move(exec_q), depends, asin_output_typeid_vector, asin_contig_dispatch_vector, asin_strided_dispatch_vector); }; m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto asin_result_type_pyapi = [&](py::dtype dtype) { + auto asin_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); }; m.def("_asin_result_type", asin_result_type_pyapi); @@ -2896,16 +2905,18 @@ void init_elementwise_functions(py::module_ m) using impl::asinh_output_typeid_vector; using impl::asinh_strided_dispatch_vector; - auto asinh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asinh_output_typeid_vector, - asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + asinh_output_typeid_vector, + asinh_contig_dispatch_vector, + asinh_strided_dispatch_vector); }; m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto asinh_result_type_pyapi = [&](py::dtype dtype) { + auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, asinh_output_typeid_vector); }; @@ -2941,13 +2952,14 @@ void init_elementwise_functions(py::module_ m) using impl::atan2_output_id_table; using impl::atan2_strided_dispatch_table; - auto atan2_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, atan2_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + atan2_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) atan2_contig_dispatch_table, @@ -2963,7 +2975,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto atan2_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, atan2_output_id_table); }; @@ -2980,16 +2993,18 @@ void init_elementwise_functions(py::module_ m) using impl::atanh_output_typeid_vector; using impl::atanh_strided_dispatch_vector; - auto atanh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atanh_output_typeid_vector, - atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + atanh_output_typeid_vector, + atanh_contig_dispatch_vector, + atanh_strided_dispatch_vector); }; m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto atanh_result_type_pyapi = [&](py::dtype dtype) { + auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, atanh_output_typeid_vector); }; @@ -3003,14 +3018,15 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_and_output_id_table; using impl::bitwise_and_strided_dispatch_table; - auto bitwise_and_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + bitwise_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_and_contig_dispatch_table, @@ -3026,8 +3042,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto bitwise_and_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, bitwise_and_output_id_table); }; @@ -3044,14 +3060,17 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_left_shift_output_id_table; using impl::bitwise_left_shift_strided_dispatch_table; - auto bitwise_left_shift_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, + src1, src2, dst, std::move(exec_q), depends, bitwise_left_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) @@ -3068,11 +3087,11 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto bitwise_left_shift_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_left_shift_output_id_table); - }; + auto bitwise_left_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_left_shift_output_id_table); + }; m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3087,10 +3106,10 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_invert_output_typeid_vector; using impl::bitwise_invert_strided_dispatch_vector; - auto bitwise_invert_pyapi = [&](arrayT src, arrayT dst, + auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, bitwise_invert_output_typeid_vector, bitwise_invert_contig_dispatch_vector, bitwise_invert_strided_dispatch_vector); @@ -3099,7 +3118,7 @@ void init_elementwise_functions(py::module_ m) py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto bitwise_invert_result_type_pyapi = [&](py::dtype dtype) { + auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type( dtype, bitwise_invert_output_typeid_vector); }; @@ -3113,14 +3132,15 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_or_output_id_table; using impl::bitwise_or_strided_dispatch_table; - auto bitwise_or_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + bitwise_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_or_contig_dispatch_table, @@ -3136,8 +3156,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto bitwise_or_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, bitwise_or_output_id_table); }; @@ -3154,14 +3174,17 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_right_shift_output_id_table; using impl::bitwise_right_shift_strided_dispatch_table; - auto bitwise_right_shift_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, + src1, src2, dst, std::move(exec_q), depends, bitwise_right_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) @@ -3178,11 +3201,11 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto bitwise_right_shift_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_right_shift_output_id_table); - }; + auto bitwise_right_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_right_shift_output_id_table); + }; m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3197,14 +3220,15 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_xor_output_id_table; using impl::bitwise_xor_strided_dispatch_table; - auto bitwise_xor_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + bitwise_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_xor_contig_dispatch_table, @@ -3220,8 +3244,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto bitwise_xor_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, bitwise_xor_output_id_table); }; @@ -3238,16 +3262,17 @@ void init_elementwise_functions(py::module_ m) using impl::ceil_output_typeid_vector; using impl::ceil_strided_dispatch_vector; - auto ceil_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, ceil_output_typeid_vector, + src, dst, std::move(exec_q), depends, ceil_output_typeid_vector, ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); }; m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto ceil_result_type_pyapi = [&](py::dtype dtype) { + auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); }; m.def("_ceil_result_type", ceil_result_type_pyapi); @@ -3260,16 +3285,17 @@ void init_elementwise_functions(py::module_ m) using impl::conj_output_typeid_vector; using impl::conj_strided_dispatch_vector; - auto conj_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, conj_output_typeid_vector, + src, dst, std::move(exec_q), depends, conj_output_typeid_vector, conj_contig_dispatch_vector, conj_strided_dispatch_vector); }; m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto conj_result_type_pyapi = [&](py::dtype dtype) { + auto conj_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); }; m.def("_conj_result_type", conj_result_type_pyapi); @@ -3282,16 +3308,17 @@ void init_elementwise_functions(py::module_ m) using impl::cos_output_typeid_vector; using impl::cos_strided_dispatch_vector; - auto cos_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, cos_output_typeid_vector, + src, dst, std::move(exec_q), depends, cos_output_typeid_vector, cos_contig_dispatch_vector, cos_strided_dispatch_vector); }; m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto cos_result_type_pyapi = [&](py::dtype dtype) { + auto cos_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); }; m.def("_cos_result_type", cos_result_type_pyapi); @@ -3304,16 +3331,17 @@ void init_elementwise_functions(py::module_ m) using impl::cosh_output_typeid_vector; using impl::cosh_strided_dispatch_vector; - auto cosh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, cosh_output_typeid_vector, + src, dst, std::move(exec_q), depends, cosh_output_typeid_vector, cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); }; m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto cosh_result_type_pyapi = [&](py::dtype dtype) { + auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); }; m.def("_cosh_result_type", cosh_result_type_pyapi); @@ -3330,13 +3358,14 @@ void init_elementwise_functions(py::module_ m) using impl::true_divide_output_id_table; using impl::true_divide_strided_dispatch_table; - auto divide_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, true_divide_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + true_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) true_divide_contig_dispatch_table, @@ -3350,8 +3379,8 @@ void init_elementwise_functions(py::module_ m) // c-contig row with broadcasting (may be nullptr) true_divide_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto divide_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, true_divide_output_id_table); }; @@ -3368,13 +3397,14 @@ void init_elementwise_functions(py::module_ m) using impl::equal_output_id_table; using impl::equal_strided_dispatch_table; - auto equal_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, equal_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) equal_contig_dispatch_table, @@ -3390,7 +3420,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto equal_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, equal_output_id_table); }; @@ -3407,16 +3438,17 @@ void init_elementwise_functions(py::module_ m) using impl::exp_output_typeid_vector; using impl::exp_strided_dispatch_vector; - auto exp_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, exp_output_typeid_vector, + src, dst, std::move(exec_q), depends, exp_output_typeid_vector, exp_contig_dispatch_vector, exp_strided_dispatch_vector); }; m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto exp_result_type_pyapi = [&](py::dtype dtype) { + auto exp_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); }; m.def("_exp_result_type", exp_result_type_pyapi); @@ -3429,16 +3461,18 @@ void init_elementwise_functions(py::module_ m) using impl::expm1_output_typeid_vector; using impl::expm1_strided_dispatch_vector; - auto expm1_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, expm1_output_typeid_vector, - expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + expm1_output_typeid_vector, + expm1_contig_dispatch_vector, + expm1_strided_dispatch_vector); }; m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto expm1_result_type_pyapi = [&](py::dtype dtype) { + auto expm1_result_type_pyapi = [&](const py::dtype dtype) { return py_unary_ufunc_result_type(dtype, expm1_output_typeid_vector); }; @@ -3452,16 +3486,18 @@ void init_elementwise_functions(py::module_ m) using impl::floor_output_typeid_vector; using impl::floor_strided_dispatch_vector; - auto floor_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, floor_output_typeid_vector, - floor_contig_dispatch_vector, floor_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + floor_output_typeid_vector, + floor_contig_dispatch_vector, + floor_strided_dispatch_vector); }; m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto floor_result_type_pyapi = [&](py::dtype dtype) { + auto floor_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, floor_output_typeid_vector); }; @@ -3475,14 +3511,15 @@ void init_elementwise_functions(py::module_ m) using impl::floor_divide_output_id_table; using impl::floor_divide_strided_dispatch_table; - auto floor_divide_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, floor_divide_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + floor_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) floor_divide_contig_dispatch_table, @@ -3498,8 +3535,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto floor_divide_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, floor_divide_output_id_table); }; @@ -3516,13 +3553,14 @@ void init_elementwise_functions(py::module_ m) using impl::greater_output_id_table; using impl::greater_strided_dispatch_table; - auto greater_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + greater_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) greater_contig_dispatch_table, @@ -3538,8 +3576,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto greater_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto greater_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, greater_output_id_table); }; @@ -3556,14 +3594,15 @@ void init_elementwise_functions(py::module_ m) using impl::greater_equal_output_id_table; using impl::greater_equal_strided_dispatch_table; - auto greater_equal_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + greater_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) greater_equal_contig_dispatch_table, @@ -3579,8 +3618,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto greater_equal_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, greater_equal_output_id_table); }; @@ -3598,16 +3637,17 @@ void init_elementwise_functions(py::module_ m) using impl::imag_output_typeid_vector; using impl::imag_strided_dispatch_vector; - auto imag_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, imag_output_typeid_vector, + src, dst, std::move(exec_q), depends, imag_output_typeid_vector, imag_contig_dispatch_vector, imag_strided_dispatch_vector); }; m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto imag_result_type_pyapi = [&](py::dtype dtype) { + auto imag_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); }; m.def("_imag_result_type", imag_result_type_pyapi); @@ -3621,15 +3661,15 @@ void init_elementwise_functions(py::module_ m) using impl::isfinite_output_typeid_vector; using impl::isfinite_strided_dispatch_vector; auto isfinite_pyapi = - [&](dpctl::tensor::usm_ndarray src, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, isfinite_output_typeid_vector, isfinite_contig_dispatch_vector, isfinite_strided_dispatch_vector); }; - auto isfinite_result_type_pyapi = [&](py::dtype dtype) { + auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, isfinite_output_typeid_vector); }; @@ -3645,15 +3685,16 @@ void init_elementwise_functions(py::module_ m) using impl::isinf_contig_dispatch_vector; using impl::isinf_output_typeid_vector; using impl::isinf_strided_dispatch_vector; - auto isinf_pyapi = [&](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isinf_output_typeid_vector, - isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + isinf_output_typeid_vector, + isinf_contig_dispatch_vector, + isinf_strided_dispatch_vector); }; - auto isinf_result_type_pyapi = [&](py::dtype dtype) { + auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, isinf_output_typeid_vector); }; @@ -3669,15 +3710,16 @@ void init_elementwise_functions(py::module_ m) using impl::isnan_contig_dispatch_vector; using impl::isnan_output_typeid_vector; using impl::isnan_strided_dispatch_vector; - auto isnan_pyapi = [&](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isnan_output_typeid_vector, - isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + isnan_output_typeid_vector, + isnan_contig_dispatch_vector, + isnan_strided_dispatch_vector); }; - auto isnan_result_type_pyapi = [&](py::dtype dtype) { + auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, isnan_output_typeid_vector); }; @@ -3693,13 +3735,14 @@ void init_elementwise_functions(py::module_ m) using impl::less_output_id_table; using impl::less_strided_dispatch_table; - auto less_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + less_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) less_contig_dispatch_table, @@ -3715,7 +3758,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto less_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto less_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, less_output_id_table); }; @@ -3732,14 +3776,15 @@ void init_elementwise_functions(py::module_ m) using impl::less_equal_output_id_table; using impl::less_equal_strided_dispatch_table; - auto less_equal_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_equal_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + less_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) less_equal_contig_dispatch_table, @@ -3755,8 +3800,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto less_equal_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, less_equal_output_id_table); }; @@ -3773,16 +3818,17 @@ void init_elementwise_functions(py::module_ m) using impl::log_output_typeid_vector; using impl::log_strided_dispatch_vector; - auto log_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto log_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, log_output_typeid_vector, + src, dst, std::move(exec_q), depends, log_output_typeid_vector, log_contig_dispatch_vector, log_strided_dispatch_vector); }; m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto log_result_type_pyapi = [&](py::dtype dtype) { + auto log_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); }; m.def("_log_result_type", log_result_type_pyapi); @@ -3795,16 +3841,18 @@ void init_elementwise_functions(py::module_ m) using impl::log1p_output_typeid_vector; using impl::log1p_strided_dispatch_vector; - auto log1p_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log1p_output_typeid_vector, - log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + log1p_output_typeid_vector, + log1p_contig_dispatch_vector, + log1p_strided_dispatch_vector); }; m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto log1p_result_type_pyapi = [&](py::dtype dtype) { + auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, log1p_output_typeid_vector); }; @@ -3818,15 +3866,15 @@ void init_elementwise_functions(py::module_ m) using impl::log2_contig_dispatch_vector; using impl::log2_output_typeid_vector; using impl::log2_strided_dispatch_vector; - auto log2_pyapi = [&](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, log2_output_typeid_vector, + src, dst, std::move(exec_q), depends, log2_output_typeid_vector, log2_contig_dispatch_vector, log2_strided_dispatch_vector); }; - auto log2_result_type_pyapi = [&](py::dtype dtype) { + auto log2_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); }; m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3841,15 +3889,16 @@ void init_elementwise_functions(py::module_ m) using impl::log10_contig_dispatch_vector; using impl::log10_output_typeid_vector; using impl::log10_strided_dispatch_vector; - auto log10_pyapi = [&](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log10_output_typeid_vector, - log10_contig_dispatch_vector, log10_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + log10_output_typeid_vector, + log10_contig_dispatch_vector, + log10_strided_dispatch_vector); }; - auto log10_result_type_pyapi = [&](py::dtype dtype) { + auto log10_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, log10_output_typeid_vector); }; @@ -3865,14 +3914,15 @@ void init_elementwise_functions(py::module_ m) using impl::logaddexp_output_id_table; using impl::logaddexp_strided_dispatch_table; - auto logaddexp_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + logaddexp_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logaddexp_contig_dispatch_table, @@ -3888,8 +3938,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logaddexp_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, logaddexp_output_id_table); }; @@ -3906,14 +3956,15 @@ void init_elementwise_functions(py::module_ m) using impl::logical_and_output_id_table; using impl::logical_and_strided_dispatch_table; - auto logical_and_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_and_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + logical_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_and_contig_dispatch_table, @@ -3929,8 +3980,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_and_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, logical_and_output_id_table); }; @@ -3947,9 +3998,10 @@ void init_elementwise_functions(py::module_ m) using impl::logical_not_output_typeid_vector; using impl::logical_not_strided_dispatch_vector; - auto logical_not_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, logical_not_output_typeid_vector, logical_not_contig_dispatch_vector, logical_not_strided_dispatch_vector); @@ -3958,7 +4010,7 @@ void init_elementwise_functions(py::module_ m) py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto logical_not_result_type_pyapi = [&](py::dtype dtype) { + auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, logical_not_output_typeid_vector); }; @@ -3972,14 +4024,15 @@ void init_elementwise_functions(py::module_ m) using impl::logical_or_output_id_table; using impl::logical_or_strided_dispatch_table; - auto logical_or_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_or_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + logical_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_or_contig_dispatch_table, @@ -3995,8 +4048,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_or_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, logical_or_output_id_table); }; @@ -4013,14 +4066,15 @@ void init_elementwise_functions(py::module_ m) using impl::logical_xor_output_id_table; using impl::logical_xor_strided_dispatch_table; - auto logical_xor_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_xor_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + logical_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_xor_contig_dispatch_table, @@ -4036,8 +4090,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_xor_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, logical_xor_output_id_table); }; @@ -4054,13 +4108,14 @@ void init_elementwise_functions(py::module_ m) using impl::maximum_output_id_table; using impl::maximum_strided_dispatch_table; - auto maximum_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, maximum_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + maximum_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) maximum_contig_dispatch_table, @@ -4076,8 +4131,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto maximum_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, maximum_output_id_table); }; @@ -4094,13 +4149,14 @@ void init_elementwise_functions(py::module_ m) using impl::minimum_output_id_table; using impl::minimum_strided_dispatch_table; - auto minimum_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, minimum_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + minimum_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) minimum_contig_dispatch_table, @@ -4116,8 +4172,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto minimum_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, minimum_output_id_table); }; @@ -4137,12 +4193,13 @@ void init_elementwise_functions(py::module_ m) using impl::multiply_strided_dispatch_table; auto multiply_pyapi = - [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, multiply_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + multiply_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) multiply_contig_dispatch_table, @@ -4156,8 +4213,8 @@ void init_elementwise_functions(py::module_ m) // and c-contig row with broadcasting (may be nullptr) multiply_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto multiply_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, multiply_output_id_table); }; @@ -4171,11 +4228,12 @@ void init_elementwise_functions(py::module_ m) using impl::multiply_inplace_strided_dispatch_table; auto multiply_inplace_pyapi = - [&](dpctl::tensor::usm_ndarray src, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, multiply_output_id_table, + src, dst, std::move(exec_q), depends, + multiply_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) multiply_inplace_contig_dispatch_table, @@ -4199,9 +4257,10 @@ void init_elementwise_functions(py::module_ m) using impl::negative_output_typeid_vector; using impl::negative_strided_dispatch_vector; - auto negative_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, negative_output_typeid_vector, negative_contig_dispatch_vector, negative_strided_dispatch_vector); @@ -4209,7 +4268,7 @@ void init_elementwise_functions(py::module_ m) m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto negative_result_type_pyapi = [&](py::dtype dtype) { + auto negative_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, negative_output_typeid_vector); }; @@ -4223,14 +4282,15 @@ void init_elementwise_functions(py::module_ m) using impl::not_equal_output_id_table; using impl::not_equal_strided_dispatch_table; - auto not_equal_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, not_equal_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + not_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) not_equal_contig_dispatch_table, @@ -4246,8 +4306,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto not_equal_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, not_equal_output_id_table); }; @@ -4264,9 +4324,10 @@ void init_elementwise_functions(py::module_ m) using impl::positive_output_typeid_vector; using impl::positive_strided_dispatch_vector; - auto positive_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, positive_output_typeid_vector, positive_contig_dispatch_vector, positive_strided_dispatch_vector); @@ -4274,7 +4335,7 @@ void init_elementwise_functions(py::module_ m) m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto positive_result_type_pyapi = [&](py::dtype dtype) { + auto positive_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, positive_output_typeid_vector); }; @@ -4288,12 +4349,14 @@ void init_elementwise_functions(py::module_ m) using impl::pow_output_id_table; using impl::pow_strided_dispatch_table; - auto pow_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, sycl::queue exec_q, + auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, pow_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + pow_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) pow_contig_dispatch_table, @@ -4309,7 +4372,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto pow_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto pow_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, pow_output_id_table); }; @@ -4326,16 +4390,17 @@ void init_elementwise_functions(py::module_ m) using impl::proj_output_typeid_vector; using impl::proj_strided_dispatch_vector; - auto proj_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, proj_output_typeid_vector, + src, dst, std::move(exec_q), depends, proj_output_typeid_vector, proj_contig_dispatch_vector, proj_strided_dispatch_vector); }; m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto proj_result_type_pyapi = [&](py::dtype dtype) { + auto proj_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); }; m.def("_proj_result_type", proj_result_type_pyapi); @@ -4348,16 +4413,17 @@ void init_elementwise_functions(py::module_ m) using impl::real_output_typeid_vector; using impl::real_strided_dispatch_vector; - auto real_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto real_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, real_output_typeid_vector, + src, dst, std::move(exec_q), depends, real_output_typeid_vector, real_contig_dispatch_vector, real_strided_dispatch_vector); }; m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto real_result_type_pyapi = [&](py::dtype dtype) { + auto real_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); }; m.def("_real_result_type", real_result_type_pyapi); @@ -4370,14 +4436,15 @@ void init_elementwise_functions(py::module_ m) using impl::remainder_output_id_table; using impl::remainder_strided_dispatch_table; - auto remainder_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, remainder_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + remainder_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) remainder_contig_dispatch_table, @@ -4393,8 +4460,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto remainder_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, remainder_output_id_table); }; @@ -4411,16 +4478,18 @@ void init_elementwise_functions(py::module_ m) using impl::round_output_typeid_vector; using impl::round_strided_dispatch_vector; - auto round_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto round_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, round_output_typeid_vector, - round_contig_dispatch_vector, round_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + round_output_typeid_vector, + round_contig_dispatch_vector, + round_strided_dispatch_vector); }; m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto round_result_type_pyapi = [&](py::dtype dtype) { + auto round_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, round_output_typeid_vector); }; @@ -4434,16 +4503,17 @@ void init_elementwise_functions(py::module_ m) using impl::sign_output_typeid_vector; using impl::sign_strided_dispatch_vector; - auto sign_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, sign_output_typeid_vector, + src, dst, std::move(exec_q), depends, sign_output_typeid_vector, sign_contig_dispatch_vector, sign_strided_dispatch_vector); }; m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto sign_result_type_pyapi = [&](py::dtype dtype) { + auto sign_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); }; m.def("_sign_result_type", sign_result_type_pyapi); @@ -4456,9 +4526,10 @@ void init_elementwise_functions(py::module_ m) using impl::signbit_output_typeid_vector; using impl::signbit_strided_dispatch_vector; - auto signbit_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, + return py_unary_ufunc(src, dst, std::move(exec_q), depends, signbit_output_typeid_vector, signbit_contig_dispatch_vector, signbit_strided_dispatch_vector); @@ -4466,7 +4537,7 @@ void init_elementwise_functions(py::module_ m) m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto signbit_result_type_pyapi = [&](py::dtype dtype) { + auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, signbit_output_typeid_vector); }; @@ -4480,16 +4551,17 @@ void init_elementwise_functions(py::module_ m) using impl::sin_output_typeid_vector; using impl::sin_strided_dispatch_vector; - auto sin_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, sin_output_typeid_vector, + src, dst, std::move(exec_q), depends, sin_output_typeid_vector, sin_contig_dispatch_vector, sin_strided_dispatch_vector); }; m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto sin_result_type_pyapi = [&](py::dtype dtype) { + auto sin_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); }; m.def("_sin_result_type", sin_result_type_pyapi); @@ -4501,16 +4573,17 @@ void init_elementwise_functions(py::module_ m) using impl::sinh_output_typeid_vector; using impl::sinh_strided_dispatch_vector; - auto sinh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, sinh_output_typeid_vector, + src, dst, std::move(exec_q), depends, sinh_output_typeid_vector, sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); }; m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto sinh_result_type_pyapi = [&](py::dtype dtype) { + auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); }; m.def("_sinh_result_type", sinh_result_type_pyapi); @@ -4523,16 +4596,18 @@ void init_elementwise_functions(py::module_ m) using impl::square_output_typeid_vector; using impl::square_strided_dispatch_vector; - auto square_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto square_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, square_output_typeid_vector, - square_contig_dispatch_vector, square_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + square_output_typeid_vector, + square_contig_dispatch_vector, + square_strided_dispatch_vector); }; m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto square_result_type_pyapi = [&](py::dtype dtype) { + auto square_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, square_output_typeid_vector); }; @@ -4546,16 +4621,17 @@ void init_elementwise_functions(py::module_ m) using impl::sqrt_output_typeid_vector; using impl::sqrt_strided_dispatch_vector; - auto sqrt_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, sqrt_output_typeid_vector, + src, dst, std::move(exec_q), depends, sqrt_output_typeid_vector, sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); }; m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto sqrt_result_type_pyapi = [&](py::dtype dtype) { + auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); }; m.def("_sqrt_result_type", sqrt_result_type_pyapi); @@ -4571,12 +4647,13 @@ void init_elementwise_functions(py::module_ m) using impl::subtract_strided_dispatch_table; auto subtract_pyapi = - [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, subtract_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + subtract_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) subtract_contig_dispatch_table, @@ -4590,8 +4667,8 @@ void init_elementwise_functions(py::module_ m) // and c-contig row with broadcasting (may be nullptr) subtract_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto subtract_result_type_pyapi = [&](py::dtype dtype1, - py::dtype dtype2) { + auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, subtract_output_id_table); }; @@ -4605,11 +4682,12 @@ void init_elementwise_functions(py::module_ m) using impl::subtract_inplace_strided_dispatch_table; auto subtract_inplace_pyapi = - [&](dpctl::tensor::usm_ndarray src, dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, subtract_output_id_table, + src, dst, std::move(exec_q), depends, + subtract_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) subtract_inplace_contig_dispatch_table, @@ -4633,16 +4711,17 @@ void init_elementwise_functions(py::module_ m) using impl::tan_output_typeid_vector; using impl::tan_strided_dispatch_vector; - auto tan_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, tan_output_typeid_vector, + src, dst, std::move(exec_q), depends, tan_output_typeid_vector, tan_contig_dispatch_vector, tan_strided_dispatch_vector); }; m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto tan_result_type_pyapi = [&](py::dtype dtype) { + auto tan_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); }; m.def("_tan_result_type", tan_result_type_pyapi); @@ -4655,16 +4734,17 @@ void init_elementwise_functions(py::module_ m) using impl::tanh_output_typeid_vector; using impl::tanh_strided_dispatch_vector; - auto tanh_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, tanh_output_typeid_vector, + src, dst, std::move(exec_q), depends, tanh_output_typeid_vector, tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); }; m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto tanh_result_type_pyapi = [&](py::dtype dtype) { + auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); }; m.def("_tanh_result_type", tanh_result_type_pyapi); @@ -4677,16 +4757,18 @@ void init_elementwise_functions(py::module_ m) using impl::trunc_output_typeid_vector; using impl::trunc_strided_dispatch_vector; - auto trunc_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, trunc_output_typeid_vector, - trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); + return py_unary_ufunc(src, dst, std::move(exec_q), depends, + trunc_output_typeid_vector, + trunc_contig_dispatch_vector, + trunc_strided_dispatch_vector); }; m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto trunc_result_type_pyapi = [&](py::dtype dtype) { + auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, trunc_output_typeid_vector); }; @@ -4700,13 +4782,14 @@ void init_elementwise_functions(py::module_ m) using impl::hypot_output_id_table; using impl::hypot_strided_dispatch_table; - auto hypot_pyapi = [&](dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, + auto hypot_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, hypot_output_id_table, + src1, src2, dst, std::move(exec_q), depends, + hypot_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) hypot_contig_dispatch_table, @@ -4722,7 +4805,8 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto hypot_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, hypot_output_id_table); }; diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 5c4b50bbc0..58704d83de 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -54,8 +54,8 @@ template std::pair -py_unary_ufunc(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +py_unary_ufunc(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue q, const std::vector &depends, // @@ -252,7 +252,7 @@ py_unary_ufunc(dpctl::tensor::usm_ndarray src, } template -py::object py_unary_ufunc_result_type(py::dtype input_dtype, +py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, const output_typesT &output_types) { int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl @@ -298,9 +298,9 @@ template std::pair py_binary_ufunc( - dpctl::tensor::usm_ndarray src1, - dpctl::tensor::usm_ndarray src2, - dpctl::tensor::usm_ndarray dst, // dst = op(src1, src2), elementwise + const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, // dst = op(src1, src2), elementwise sycl::queue exec_q, const std::vector depends, // @@ -560,8 +560,8 @@ std::pair py_binary_ufunc( } template -py::object py_binary_ufunc_result_type(py::dtype input1_dtype, - py::dtype input2_dtype, +py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype, + const py::dtype &input2_dtype, const output_typesT &output_types_table) { int tn1 = input1_dtype.num(); // NumPy type numbers are the same as in dpctl @@ -605,8 +605,8 @@ template std::pair -py_binary_inplace_ufunc(dpctl::tensor::usm_ndarray lhs, - dpctl::tensor::usm_ndarray rhs, +py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs, + const dpctl::tensor::usm_ndarray &rhs, sycl::queue exec_q, const std::vector depends, // diff --git a/dpctl/tensor/libtensor/source/eye_ctor.cpp b/dpctl/tensor/libtensor/source/eye_ctor.cpp index c4a8f0cd08..85e28cf87f 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.cpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.cpp @@ -50,7 +50,7 @@ static eye_fn_ptr_t eye_dispatch_vector[td_ns::num_types]; std::pair usm_ndarray_eye(py::ssize_t k, - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/eye_ctor.hpp b/dpctl/tensor/libtensor/source/eye_ctor.hpp index 3436c23bd8..bb02ea1075 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.hpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.hpp @@ -39,7 +39,7 @@ namespace py_internal extern std::pair usm_ndarray_eye(py::ssize_t k, - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/full_ctor.cpp b/dpctl/tensor/libtensor/source/full_ctor.cpp index 0a6ec1923d..cbf1736701 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl/tensor/libtensor/source/full_ctor.cpp @@ -53,8 +53,8 @@ using dpctl::tensor::kernels::constructors::full_contig_fn_ptr_t; static full_contig_fn_ptr_t full_contig_dispatch_vector[td_ns::num_types]; std::pair -usm_ndarray_full(py::object py_value, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_full(const py::object &py_value, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/full_ctor.hpp b/dpctl/tensor/libtensor/source/full_ctor.hpp index 3870573fa4..dd45ee9b60 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.hpp +++ b/dpctl/tensor/libtensor/source/full_ctor.hpp @@ -38,8 +38,8 @@ namespace py_internal { extern std::pair -usm_ndarray_full(py::object py_value, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_full(const py::object &py_value, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index 7f540911a6..a817ac14d7 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -200,7 +200,7 @@ _populate_kernel_params(sycl::queue exec_q, /* Utility to parse python object py_ind into vector of `usm_ndarray`s */ std::vector parse_py_ind(const sycl::queue &q, - py::object py_ind) + const py::object &py_ind) { size_t ind_count = py::len(py_ind); std::vector res; @@ -233,9 +233,9 @@ std::vector parse_py_ind(const sycl::queue &q, } std::pair -usm_ndarray_take(dpctl::tensor::usm_ndarray src, - py::object py_ind, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_take(const dpctl::tensor::usm_ndarray &src, + const py::object &py_ind, + const dpctl::tensor::usm_ndarray &dst, int axis_start, uint8_t mode, sycl::queue exec_q, @@ -544,9 +544,9 @@ usm_ndarray_take(dpctl::tensor::usm_ndarray src, } std::pair -usm_ndarray_put(dpctl::tensor::usm_ndarray dst, - py::object py_ind, - dpctl::tensor::usm_ndarray val, +usm_ndarray_put(const dpctl::tensor::usm_ndarray &dst, + const py::object &py_ind, + const dpctl::tensor::usm_ndarray &val, int axis_start, uint8_t mode, sycl::queue exec_q, diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp index 526bbd8020..f2ce1b1d14 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp @@ -39,18 +39,18 @@ namespace py_internal { extern std::pair -usm_ndarray_take(dpctl::tensor::usm_ndarray, - py::object, - dpctl::tensor::usm_ndarray, +usm_ndarray_take(const dpctl::tensor::usm_ndarray &, + const py::object &, + const dpctl::tensor::usm_ndarray &, int, uint8_t, sycl::queue, const std::vector & = {}); extern std::pair -usm_ndarray_put(dpctl::tensor::usm_ndarray, - py::object, - dpctl::tensor::usm_ndarray, +usm_ndarray_put(const dpctl::tensor::usm_ndarray &, + const py::object &, + const dpctl::tensor::usm_ndarray &, int, uint8_t, sycl::queue, diff --git a/dpctl/tensor/libtensor/source/linear_sequences.cpp b/dpctl/tensor/libtensor/source/linear_sequences.cpp index 306add5f54..f933161900 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.cpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.cpp @@ -58,9 +58,9 @@ static lin_space_affine_fn_ptr_t lin_space_affine_dispatch_vector[td_ns::num_types]; std::pair -usm_ndarray_linear_sequence_step(py::object start, - py::object dt, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_linear_sequence_step(const py::object &start, + const py::object &dt, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { @@ -105,9 +105,9 @@ usm_ndarray_linear_sequence_step(py::object start, } std::pair -usm_ndarray_linear_sequence_affine(py::object start, - py::object end, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_linear_sequence_affine(const py::object &start, + const py::object &end, + const dpctl::tensor::usm_ndarray &dst, bool include_endpoint, sycl::queue exec_q, const std::vector &depends) diff --git a/dpctl/tensor/libtensor/source/linear_sequences.hpp b/dpctl/tensor/libtensor/source/linear_sequences.hpp index 8da56ecd10..f51fe266b1 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.hpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.hpp @@ -38,16 +38,16 @@ namespace py_internal { extern std::pair -usm_ndarray_linear_sequence_step(py::object start, - py::object dt, - dpctl::tensor::usm_ndarray dst, +usm_ndarray_linear_sequence_step(const py::object &start, + const py::object &dt, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends = {}); extern std::pair usm_ndarray_linear_sequence_affine( - py::object start, - py::object end, - dpctl::tensor::usm_ndarray dst, + const py::object &start, + const py::object &end, + const dpctl::tensor::usm_ndarray &dst, bool include_endpoint, sycl::queue exec_q, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/repeat.cpp b/dpctl/tensor/libtensor/source/repeat.cpp index 4d6ef27a81..f9f7c599e1 100644 --- a/dpctl/tensor/libtensor/source/repeat.cpp +++ b/dpctl/tensor/libtensor/source/repeat.cpp @@ -90,13 +90,13 @@ void init_repeat_dispatch_vectors(void) } std::pair -py_repeat_by_sequence(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, - dpctl::tensor::usm_ndarray reps, - dpctl::tensor::usm_ndarray cumsum, +py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &reps, + const dpctl::tensor::usm_ndarray &cumsum, int axis, sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { int src_nd = src.get_ndim(); if (axis < 0 || (axis + 1 > src_nd && src_nd > 0) || @@ -344,12 +344,12 @@ py_repeat_by_sequence(dpctl::tensor::usm_ndarray src, } std::pair -py_repeat_by_scalar(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, const py::ssize_t reps, int axis, sycl::queue exec_q, - std::vector const &depends) + const std::vector &depends) { int src_nd = src.get_ndim(); if (axis < 0 || (axis + 1 > src_nd && src_nd > 0) || diff --git a/dpctl/tensor/libtensor/source/repeat.hpp b/dpctl/tensor/libtensor/source/repeat.hpp index e7ec59f209..7d3e8da2d9 100644 --- a/dpctl/tensor/libtensor/source/repeat.hpp +++ b/dpctl/tensor/libtensor/source/repeat.hpp @@ -40,21 +40,21 @@ namespace py_internal extern void init_repeat_dispatch_vectors(void); extern std::pair -py_repeat_by_sequence(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, - dpctl::tensor::usm_ndarray reps, - dpctl::tensor::usm_ndarray cumsum, +py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + const dpctl::tensor::usm_ndarray &reps, + const dpctl::tensor::usm_ndarray &cumsum, int axis, sycl::queue exec_q, - std::vector const &depends); + const std::vector &depends); extern std::pair -py_repeat_by_scalar(dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, +py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, const py::ssize_t reps, int axis, sycl::queue exec_q, - std::vector const &depends); + const std::vector &depends); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/sum_reductions.cpp b/dpctl/tensor/libtensor/source/sum_reductions.cpp index 13ab268b55..d82b6f256a 100644 --- a/dpctl/tensor/libtensor/source/sum_reductions.cpp +++ b/dpctl/tensor/libtensor/source/sum_reductions.cpp @@ -95,9 +95,9 @@ static sum_reduction_contig_impl_fn_ptr [td_ns::num_types]; std::pair py_sum_over_axis( - dpctl::tensor::usm_ndarray src, + const dpctl::tensor::usm_ndarray &src, int trailing_dims_to_reduce, // sum over this many trailing indexes - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { @@ -419,8 +419,8 @@ std::pair py_sum_over_axis( return std::make_pair(keep_args_event, comp_ev); } -bool py_sum_over_axis_dtype_supported(py::dtype input_dtype, - py::dtype output_dtype, +bool py_sum_over_axis_dtype_supported(const py::dtype &input_dtype, + const py::dtype &output_dtype, const std::string &dst_usm_type, sycl::queue q) { diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index b40b50d030..74c226823c 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -53,8 +53,8 @@ static tri_fn_ptr_t triu_generic_dispatch_vector[td_ns::num_types]; std::pair usm_ndarray_triul(sycl::queue exec_q, - dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, char part, py::ssize_t k = 0, const std::vector &depends = {}) diff --git a/dpctl/tensor/libtensor/source/triul_ctor.hpp b/dpctl/tensor/libtensor/source/triul_ctor.hpp index 2f277bb416..a2e6434dc8 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.hpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.hpp @@ -39,8 +39,8 @@ namespace py_internal extern std::pair usm_ndarray_triul(sycl::queue exec_q, - dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, + const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, char part, py::ssize_t k = 0, const std::vector &depends = {}); diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index 06c6ca4a25..385f9b9428 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -59,10 +59,10 @@ static where_strided_impl_fn_ptr_t using dpctl::utils::keep_args_alive; std::pair -py_where(dpctl::tensor::usm_ndarray condition, - dpctl::tensor::usm_ndarray x1, - dpctl::tensor::usm_ndarray x2, - dpctl::tensor::usm_ndarray dst, +py_where(const dpctl::tensor::usm_ndarray &condition, + const dpctl::tensor::usm_ndarray &x1, + const dpctl::tensor::usm_ndarray &x2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/where.hpp b/dpctl/tensor/libtensor/source/where.hpp index 38d40b8550..4a0fe7fa6d 100644 --- a/dpctl/tensor/libtensor/source/where.hpp +++ b/dpctl/tensor/libtensor/source/where.hpp @@ -38,10 +38,10 @@ namespace py_internal { extern std::pair -py_where(dpctl::tensor::usm_ndarray, - dpctl::tensor::usm_ndarray, - dpctl::tensor::usm_ndarray, - dpctl::tensor::usm_ndarray, +py_where(const dpctl::tensor::usm_ndarray &, + const dpctl::tensor::usm_ndarray &, + const dpctl::tensor::usm_ndarray &, + const dpctl::tensor::usm_ndarray &, sycl::queue, const std::vector &); From a7d47371e7594af582874bae9658c9cc98b37e0e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Sep 2023 17:18:52 -0500 Subject: [PATCH 2/9] Pass sycl::queue by reference, instead of by value per Coverity issues `sycl::queue` stored shared pointer and copying that involves use of atomics. Passing queue by reference ensures no copy. --- .../include/kernels/accumulators.hpp | 10 +++--- .../kernels/boolean_advanced_indexing.hpp | 20 +++++------ .../include/kernels/boolean_reductions.hpp | 10 +++--- .../include/kernels/constructors.hpp | 26 +++++++------- .../include/kernels/copy_and_cast.hpp | 34 +++++++++---------- .../kernels/elementwise_functions/abs.hpp | 4 +-- .../kernels/elementwise_functions/acos.hpp | 4 +-- .../kernels/elementwise_functions/acosh.hpp | 4 +-- .../kernels/elementwise_functions/add.hpp | 14 ++++---- .../kernels/elementwise_functions/asin.hpp | 4 +-- .../kernels/elementwise_functions/asinh.hpp | 4 +-- .../kernels/elementwise_functions/atan.hpp | 4 +-- .../kernels/elementwise_functions/atan2.hpp | 4 +-- .../kernels/elementwise_functions/atanh.hpp | 4 +-- .../elementwise_functions/bitwise_and.hpp | 4 +-- .../elementwise_functions/bitwise_invert.hpp | 4 +-- .../bitwise_left_shift.hpp | 4 +-- .../elementwise_functions/bitwise_or.hpp | 4 +-- .../bitwise_right_shift.hpp | 4 +-- .../elementwise_functions/bitwise_xor.hpp | 4 +-- .../kernels/elementwise_functions/ceil.hpp | 4 +-- .../kernels/elementwise_functions/common.hpp | 27 ++++++++------- .../elementwise_functions/common_inplace.hpp | 14 ++++---- .../kernels/elementwise_functions/conj.hpp | 4 +-- .../kernels/elementwise_functions/cos.hpp | 4 +-- .../kernels/elementwise_functions/cosh.hpp | 4 +-- .../kernels/elementwise_functions/equal.hpp | 4 +-- .../kernels/elementwise_functions/exp.hpp | 4 +-- .../kernels/elementwise_functions/expm1.hpp | 23 ++----------- .../kernels/elementwise_functions/floor.hpp | 4 +-- .../elementwise_functions/floor_divide.hpp | 4 +-- .../kernels/elementwise_functions/greater.hpp | 4 +-- .../elementwise_functions/greater_equal.hpp | 4 +-- .../kernels/elementwise_functions/hypot.hpp | 4 +-- .../kernels/elementwise_functions/imag.hpp | 4 +-- .../elementwise_functions/isfinite.hpp | 4 +-- .../kernels/elementwise_functions/isinf.hpp | 4 +-- .../kernels/elementwise_functions/isnan.hpp | 4 +-- .../kernels/elementwise_functions/less.hpp | 4 +-- .../elementwise_functions/less_equal.hpp | 4 +-- .../kernels/elementwise_functions/log.hpp | 23 ++----------- .../kernels/elementwise_functions/log10.hpp | 23 ++----------- .../kernels/elementwise_functions/log1p.hpp | 23 ++----------- .../kernels/elementwise_functions/log2.hpp | 23 ++----------- .../elementwise_functions/logaddexp.hpp | 4 +-- .../elementwise_functions/logical_and.hpp | 4 +-- .../elementwise_functions/logical_not.hpp | 4 +-- .../elementwise_functions/logical_or.hpp | 4 +-- .../elementwise_functions/logical_xor.hpp | 4 +-- .../kernels/elementwise_functions/maximum.hpp | 4 +-- .../kernels/elementwise_functions/minimum.hpp | 4 +-- .../elementwise_functions/multiply.hpp | 14 ++++---- .../elementwise_functions/negative.hpp | 23 ++----------- .../elementwise_functions/not_equal.hpp | 4 +-- .../elementwise_functions/positive.hpp | 23 ++----------- .../kernels/elementwise_functions/pow.hpp | 4 +-- .../kernels/elementwise_functions/proj.hpp | 6 ++-- .../kernels/elementwise_functions/real.hpp | 4 +-- .../elementwise_functions/remainder.hpp | 4 +-- .../kernels/elementwise_functions/round.hpp | 4 +-- .../kernels/elementwise_functions/sign.hpp | 23 ++----------- .../kernels/elementwise_functions/signbit.hpp | 4 +-- .../kernels/elementwise_functions/sin.hpp | 4 +-- .../kernels/elementwise_functions/sinh.hpp | 4 +-- .../kernels/elementwise_functions/sqrt.hpp | 4 +-- .../kernels/elementwise_functions/square.hpp | 4 +-- .../elementwise_functions/subtract.hpp | 14 ++++---- .../kernels/elementwise_functions/tan.hpp | 4 +-- .../kernels/elementwise_functions/tanh.hpp | 4 +-- .../elementwise_functions/true_divide.hpp | 8 ++--- .../kernels/elementwise_functions/trunc.hpp | 4 +-- .../kernels/integer_advanced_indexing.hpp | 8 ++--- .../libtensor/include/kernels/reductions.hpp | 12 +++---- .../libtensor/include/kernels/repeat.hpp | 16 ++++----- .../libtensor/include/kernels/where.hpp | 8 ++--- 75 files changed, 239 insertions(+), 390 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp index 0072690e9b..d153a3b332 100644 --- a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp +++ b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp @@ -103,7 +103,7 @@ template -sycl::event inclusive_scan_rec(sycl::queue exec_q, +sycl::event inclusive_scan_rec(sycl::queue &exec_q, size_t n_elems, size_t wg_size, const inputT *input, @@ -214,14 +214,14 @@ sycl::event inclusive_scan_rec(sycl::queue exec_q, } typedef size_t (*accumulate_contig_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, char *, std::vector const &); template -size_t accumulate_contig_impl(sycl::queue q, +size_t accumulate_contig_impl(sycl::queue &q, size_t n_elems, const char *mask, char *cumsum, @@ -296,7 +296,7 @@ template struct Cumsum1DContigFactory }; typedef size_t (*accumulate_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, int, @@ -305,7 +305,7 @@ typedef size_t (*accumulate_strided_impl_fn_ptr_t)( std::vector const &); template -size_t accumulate_strided_impl(sycl::queue q, +size_t accumulate_strided_impl(sycl::queue &q, size_t n_elems, const char *mask, int nd, diff --git a/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp index 595fc68496..968459fb68 100644 --- a/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp @@ -198,7 +198,7 @@ template sycl::event masked_extract_all_slices_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, py::ssize_t iteration_size, const char *src_p, const char *cumsum_p, @@ -253,7 +253,7 @@ sycl::event masked_extract_all_slices_strided_impl( } typedef sycl::event (*masked_extract_some_slices_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, py::ssize_t, py::ssize_t, const char *, @@ -278,7 +278,7 @@ class masked_extract_some_slices_strided_impl_krn; template sycl::event masked_extract_some_slices_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, py::ssize_t orthog_nelems, py::ssize_t masked_nelems, const char *src_p, @@ -380,7 +380,7 @@ template sycl::event masked_place_all_slices_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, py::ssize_t iteration_size, char *dst_p, const char *cumsum_p, @@ -430,7 +430,7 @@ sycl::event masked_place_all_slices_strided_impl( } typedef sycl::event (*masked_place_some_slices_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, py::ssize_t, py::ssize_t, char *, @@ -455,7 +455,7 @@ class masked_place_some_slices_strided_impl_krn; template sycl::event masked_place_some_slices_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, py::ssize_t orthog_nelems, py::ssize_t masked_nelems, char *dst_p, @@ -549,7 +549,7 @@ struct MaskPlaceSomeSlicesStridedFactoryForInt64 template class non_zero_indexes_krn; typedef sycl::event (*non_zero_indexes_fn_ptr_t)( - sycl::queue, + sycl::queue &, py::ssize_t, py::ssize_t, int, @@ -559,7 +559,7 @@ typedef sycl::event (*non_zero_indexes_fn_ptr_t)( std::vector const &); template -sycl::event non_zero_indexes_impl(sycl::queue exec_q, +sycl::event non_zero_indexes_impl(sycl::queue &exec_q, py::ssize_t iter_size, py::ssize_t nz_elems, int nd, diff --git a/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp b/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp index 9b54e505f6..9736b2c2a3 100644 --- a/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp @@ -244,7 +244,7 @@ struct ContigBooleanReduction }; typedef sycl::event (*boolean_reduction_contig_impl_fn_ptr)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -264,7 +264,7 @@ using dpctl::tensor::sycl_utils::choose_workgroup_size; template sycl::event -boolean_reduction_axis1_contig_impl(sycl::queue exec_q, +boolean_reduction_axis1_contig_impl(sycl::queue &exec_q, size_t iter_nelems, size_t reduction_nelems, const char *arg_cp, @@ -463,7 +463,7 @@ class boolean_reduction_axis0_contig_krn; template sycl::event -boolean_reduction_axis0_contig_impl(sycl::queue exec_q, +boolean_reduction_axis0_contig_impl(sycl::queue &exec_q, size_t iter_nelems, size_t reduction_nelems, const char *arg_cp, @@ -572,7 +572,7 @@ template class boolean_reduction_seq_strided_krn; typedef sycl::event (*boolean_reduction_strided_impl_fn_ptr)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -588,7 +588,7 @@ typedef sycl::event (*boolean_reduction_strided_impl_fn_ptr)( template sycl::event -boolean_reduction_strided_impl(sycl::queue exec_q, +boolean_reduction_strided_impl(sycl::queue &exec_q, size_t iter_nelems, size_t reduction_nelems, const char *arg_cp, diff --git a/dpctl/tensor/libtensor/include/kernels/constructors.hpp b/dpctl/tensor/libtensor/include/kernels/constructors.hpp index e9745b2635..8870e26ac2 100644 --- a/dpctl/tensor/libtensor/include/kernels/constructors.hpp +++ b/dpctl/tensor/libtensor/include/kernels/constructors.hpp @@ -72,7 +72,7 @@ template <> inline sycl::half unbox_py_scalar(const py::object &o) // start and step data typedef sycl::event (*lin_space_step_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // num_elements const py::object &start, const py::object &step, @@ -124,7 +124,7 @@ template class LinearSequenceStepFunctor * @defgroup CtorKernels */ template -sycl::event lin_space_step_impl(sycl::queue exec_q, +sycl::event lin_space_step_impl(sycl::queue &exec_q, size_t nelems, Ty start_v, Ty step_v, @@ -162,7 +162,7 @@ sycl::event lin_space_step_impl(sycl::queue exec_q, * @defgroup CtorKernels */ template -sycl::event lin_space_step_impl(sycl::queue exec_q, +sycl::event lin_space_step_impl(sycl::queue &exec_q, size_t nelems, const py::object &start, const py::object &step, @@ -202,7 +202,7 @@ template struct LinSpaceStepFactory // start and and data typedef sycl::event (*lin_space_affine_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // num_elements const py::object &start, const py::object &end, @@ -280,7 +280,7 @@ template class LinearSequenceAffineFunctor * @defgroup CtorKernels */ template -sycl::event lin_space_affine_impl(sycl::queue exec_q, +sycl::event lin_space_affine_impl(sycl::queue &exec_q, size_t nelems, Ty start_v, Ty end_v, @@ -333,7 +333,7 @@ sycl::event lin_space_affine_impl(sycl::queue exec_q, * @defgroup CtorKernels */ template -sycl::event lin_space_affine_impl(sycl::queue exec_q, +sycl::event lin_space_affine_impl(sycl::queue &exec_q, size_t nelems, const py::object &start, const py::object &end, @@ -370,7 +370,7 @@ template struct LinSpaceAffineFactory /* ================ Full ================== */ -typedef sycl::event (*full_contig_fn_ptr_t)(sycl::queue, +typedef sycl::event (*full_contig_fn_ptr_t)(sycl::queue &, size_t, const py::object &, char *, @@ -392,7 +392,7 @@ typedef sycl::event (*full_contig_fn_ptr_t)(sycl::queue, * @defgroup CtorKernels */ template -sycl::event full_contig_impl(sycl::queue q, +sycl::event full_contig_impl(sycl::queue &q, size_t nelems, dstTy fill_v, char *dst_p, @@ -425,7 +425,7 @@ sycl::event full_contig_impl(sycl::queue q, * @defgroup CtorKernels */ template -sycl::event full_contig_impl(sycl::queue exec_q, +sycl::event full_contig_impl(sycl::queue &exec_q, size_t nelems, const py::object &py_value, char *dst_p, @@ -455,7 +455,7 @@ template struct FullContigFactory /* ================ Eye ================== */ -typedef sycl::event (*eye_fn_ptr_t)(sycl::queue, +typedef sycl::event (*eye_fn_ptr_t)(sycl::queue &, size_t nelems, // num_elements py::ssize_t start, py::ssize_t end, @@ -509,7 +509,7 @@ template class EyeFunctor * @defgroup CtorKernels */ template -sycl::event eye_impl(sycl::queue exec_q, +sycl::event eye_impl(sycl::queue &exec_q, size_t nelems, const py::ssize_t start, const py::ssize_t end, @@ -544,7 +544,7 @@ template struct EyeFactory /* =========================== Tril and triu ============================== */ // define function type -typedef sycl::event (*tri_fn_ptr_t)(sycl::queue, +typedef sycl::event (*tri_fn_ptr_t)(sycl::queue &, py::ssize_t, // inner_range //py::ssize_t py::ssize_t, // outer_range char *, // src_data_ptr @@ -579,7 +579,7 @@ typedef sycl::event (*tri_fn_ptr_t)(sycl::queue, */ template class tri_kernel; template -sycl::event tri_impl(sycl::queue exec_q, +sycl::event tri_impl(sycl::queue &exec_q, py::ssize_t inner_range, py::ssize_t outer_range, char *src_p, diff --git a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp index e5aaa34903..0c8f4a64f7 100644 --- a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp +++ b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp @@ -100,7 +100,7 @@ class GenericCopyFunctor * @brief Function pointer type for generic array cast and copying function. */ typedef sycl::event (*copy_and_cast_generic_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, const py::ssize_t *, @@ -146,7 +146,7 @@ typedef sycl::event (*copy_and_cast_generic_fn_ptr_t)( */ template sycl::event -copy_and_cast_generic_impl(sycl::queue q, +copy_and_cast_generic_impl(sycl::queue &q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -277,7 +277,7 @@ class ContigCopyFunctor * @brief Function pointer type for contiguous array cast and copy function. */ typedef sycl::event (*copy_and_cast_contig_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, char *, @@ -303,7 +303,7 @@ typedef sycl::event (*copy_and_cast_contig_fn_ptr_t)( @ingroup CopyAndCastKernels */ template -sycl::event copy_and_cast_contig_impl(sycl::queue q, +sycl::event copy_and_cast_contig_impl(sycl::queue &q, size_t nelems, const char *src_cp, char *dst_cp, @@ -356,7 +356,7 @@ template struct CopyAndCastContigFactory * @ingroup CopyAndCastKernels */ typedef sycl::event (*copy_and_cast_1d_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const std::array, const std::array, @@ -372,7 +372,7 @@ typedef sycl::event (*copy_and_cast_1d_fn_ptr_t)( * @ingroup CopyAndCastKernels */ typedef sycl::event (*copy_and_cast_2d_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const std::array, const std::array, @@ -414,7 +414,7 @@ typedef sycl::event (*copy_and_cast_2d_fn_ptr_t)( */ template sycl::event -copy_and_cast_nd_specialized_impl(sycl::queue q, +copy_and_cast_nd_specialized_impl(sycl::queue &q, size_t nelems, const std::array shape, const std::array src_strides, @@ -507,7 +507,7 @@ class GenericCopyFromHostFunctor }; typedef void (*copy_and_cast_from_host_blocking_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, py::ssize_t *, @@ -560,7 +560,7 @@ typedef void (*copy_and_cast_from_host_blocking_fn_ptr_t)( */ template void copy_and_cast_from_host_impl( - sycl::queue q, + sycl::queue &q, size_t nelems, int nd, py::ssize_t *shape_and_strides, @@ -661,7 +661,7 @@ class GenericCopyForReshapeFunctor // define function type typedef sycl::event (*copy_for_reshape_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // num_elements int, // src_nd int, // dst_nd @@ -693,7 +693,7 @@ typedef sycl::event (*copy_for_reshape_fn_ptr_t)( */ template sycl::event -copy_for_reshape_generic_impl(sycl::queue q, +copy_for_reshape_generic_impl(sycl::queue &q, size_t nelems, int src_nd, int dst_nd, @@ -862,7 +862,7 @@ class StridedCopyForRollFunctor // define function type typedef sycl::event (*copy_for_roll_strided_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // shift size_t, // num_elements int, // common_nd @@ -899,7 +899,7 @@ typedef sycl::event (*copy_for_roll_strided_fn_ptr_t)( */ template sycl::event -copy_for_roll_strided_impl(sycl::queue q, +copy_for_roll_strided_impl(sycl::queue &q, size_t shift, size_t nelems, int nd, @@ -950,7 +950,7 @@ copy_for_roll_strided_impl(sycl::queue q, // define function type typedef sycl::event (*copy_for_roll_contig_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // shift size_t, // num_elements const char *, // src_data_ptr @@ -983,7 +983,7 @@ template class copy_for_roll_contig_kernel; * @ingroup CopyAndCastKernels */ template -sycl::event copy_for_roll_contig_impl(sycl::queue q, +sycl::event copy_for_roll_contig_impl(sycl::queue &q, size_t shift, size_t nelems, const char *src_p, @@ -1053,7 +1053,7 @@ class copy_for_roll_ndshift_strided_kernel; // define function type typedef sycl::event (*copy_for_roll_ndshift_strided_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, // num_elements int, // common_nd const py::ssize_t *, // packed shape, strides, shifts @@ -1065,7 +1065,7 @@ typedef sycl::event (*copy_for_roll_ndshift_strided_fn_ptr_t)( template sycl::event copy_for_roll_ndshift_strided_impl( - sycl::queue q, + sycl::queue &q, size_t nelems, int nd, const py::ssize_t *packed_shapes_and_strides_and_shifts, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp index f96b31c137..bcf6a28040 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp @@ -160,7 +160,7 @@ template class abs_contig_kernel; template -sycl::event abs_contig_impl(sycl::queue exec_q, +sycl::event abs_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -204,7 +204,7 @@ using AbsStridedFunctor = elementwise_common:: template class abs_strided_kernel; template -sycl::event abs_strided_impl(sycl::queue exec_q, +sycl::event abs_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index c091005f42..ac1d597c93 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -151,7 +151,7 @@ template class acos_contig_kernel; template -sycl::event acos_contig_impl(sycl::queue exec_q, +sycl::event acos_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -192,7 +192,7 @@ template class acos_strided_kernel; template sycl::event -acos_strided_impl(sycl::queue exec_q, +acos_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 9601f6b3a7..484b0da8a6 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -181,7 +181,7 @@ template class acosh_contig_kernel; template -sycl::event acosh_contig_impl(sycl::queue exec_q, +sycl::event acosh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -222,7 +222,7 @@ template class acosh_strided_kernel; template sycl::event -acosh_strided_impl(sycl::queue exec_q, +acosh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp index 18e946a7d0..df6797845f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp @@ -176,7 +176,7 @@ template -sycl::event add_contig_impl(sycl::queue exec_q, +sycl::event add_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -222,7 +222,7 @@ template class add_strided_kernel; template -sycl::event add_strided_impl(sycl::queue exec_q, +sycl::event add_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -270,7 +270,7 @@ using AddContigMatrixContigRowBroadcastingFunctor = template sycl::event add_contig_matrix_contig_row_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -319,7 +319,7 @@ struct AddContigMatrixContigRowBroadcastFactory template sycl::event add_contig_row_contig_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -412,7 +412,7 @@ class add_inplace_contig_kernel; template sycl::event -add_inplace_contig_impl(sycl::queue exec_q, +add_inplace_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, py::ssize_t arg_offset, @@ -446,7 +446,7 @@ class add_inplace_strided_kernel; template sycl::event -add_inplace_strided_impl(sycl::queue exec_q, +add_inplace_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -492,7 +492,7 @@ using AddInplaceRowMatrixBroadcastingFunctor = template sycl::event add_inplace_row_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index f559171b54..8b960dd30d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -171,7 +171,7 @@ template class asin_contig_kernel; template -sycl::event asin_contig_impl(sycl::queue exec_q, +sycl::event asin_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -212,7 +212,7 @@ template class asin_strided_kernel; template sycl::event -asin_strided_impl(sycl::queue exec_q, +asin_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index 729d364abf..271a861cfe 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -156,7 +156,7 @@ template class asinh_contig_kernel; template -sycl::event asinh_contig_impl(sycl::queue exec_q, +sycl::event asinh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -197,7 +197,7 @@ template class asinh_strided_kernel; template sycl::event -asinh_strided_impl(sycl::queue exec_q, +asinh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp index 8f26d0cb42..f1dcce2831 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp @@ -163,7 +163,7 @@ template class atan_contig_kernel; template -sycl::event atan_contig_impl(sycl::queue exec_q, +sycl::event atan_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -204,7 +204,7 @@ template class atan_strided_kernel; template sycl::event -atan_strided_impl(sycl::queue exec_q, +atan_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp index fdc9c4e66e..765c0fe0c3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp @@ -109,7 +109,7 @@ template -sycl::event atan2_contig_impl(sycl::queue exec_q, +sycl::event atan2_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -158,7 +158,7 @@ class atan2_strided_kernel; template sycl::event -atan2_strided_impl(sycl::queue exec_q, +atan2_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp index 6eae85f7b7..56432d7808 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp @@ -160,7 +160,7 @@ template class atanh_contig_kernel; template -sycl::event atanh_contig_impl(sycl::queue exec_q, +sycl::event atanh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -201,7 +201,7 @@ template class atanh_strided_kernel; template sycl::event -atanh_strided_impl(sycl::queue exec_q, +atanh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp index 24a547eacf..016b3a05d3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp @@ -167,7 +167,7 @@ class bitwise_and_contig_kernel; template sycl::event -bitwise_and_contig_impl(sycl::queue exec_q, +bitwise_and_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -218,7 +218,7 @@ class bitwise_and_strided_kernel; template sycl::event -bitwise_and_strided_impl(sycl::queue exec_q, +bitwise_and_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp index f258af34e2..9ce56be966 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp @@ -126,7 +126,7 @@ class bitwise_invert_contig_kernel; template sycl::event -bitwise_invert_contig_impl(sycl::queue exec_q, +bitwise_invert_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -171,7 +171,7 @@ class bitwise_invert_strided_kernel; template sycl::event -bitwise_invert_strided_impl(sycl::queue exec_q, +bitwise_invert_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp index 6e12d80246..4ae04f97de 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp @@ -176,7 +176,7 @@ class bitwise_left_shift_contig_kernel; template sycl::event -bitwise_left_shift_contig_impl(sycl::queue exec_q, +bitwise_left_shift_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -229,7 +229,7 @@ class bitwise_left_shift_strided_kernel; template sycl::event bitwise_left_shift_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp index fd1f3b357c..65f25dd296 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp @@ -165,7 +165,7 @@ template -sycl::event bitwise_or_contig_impl(sycl::queue exec_q, +sycl::event bitwise_or_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -215,7 +215,7 @@ class bitwise_or_strided_kernel; template sycl::event -bitwise_or_strided_impl(sycl::queue exec_q, +bitwise_or_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp index c2666bb1cd..9442d4f6b7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp @@ -178,7 +178,7 @@ class bitwise_right_shift_contig_kernel; template sycl::event -bitwise_right_shift_contig_impl(sycl::queue exec_q, +bitwise_right_shift_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -231,7 +231,7 @@ class bitwise_right_shift_strided_kernel; template sycl::event bitwise_right_shift_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp index 9569cd53cd..2b0ab09dca 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp @@ -167,7 +167,7 @@ class bitwise_xor_contig_kernel; template sycl::event -bitwise_xor_contig_impl(sycl::queue exec_q, +bitwise_xor_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -218,7 +218,7 @@ class bitwise_xor_strided_kernel; template sycl::event -bitwise_xor_strided_impl(sycl::queue exec_q, +bitwise_xor_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp index 7cabf9b7e3..76fa80c287 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp @@ -110,7 +110,7 @@ template class ceil_contig_kernel; template -sycl::event ceil_contig_impl(sycl::queue exec_q, +sycl::event ceil_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -151,7 +151,7 @@ template class ceil_strided_kernel; template sycl::event -ceil_strided_impl(sycl::queue exec_q, +ceil_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp index 59d13bd054..1bb1b48b2f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "utils/offset_utils.hpp" @@ -261,7 +262,7 @@ template -sycl::event unary_contig_impl(sycl::queue exec_q, +sycl::event unary_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -296,7 +297,7 @@ template class kernel_name> sycl::event -unary_strided_impl(sycl::queue exec_q, +unary_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -624,14 +625,14 @@ struct BinaryContigRowContigMatrixBroadcastingFunctor // Typedefs for function pointers typedef sycl::event (*unary_contig_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, char *, const std::vector &); typedef sycl::event (*unary_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, const py::ssize_t *, @@ -643,7 +644,7 @@ typedef sycl::event (*unary_strided_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_contig_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, py::ssize_t, @@ -654,7 +655,7 @@ typedef sycl::event (*binary_contig_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, const py::ssize_t *, @@ -668,7 +669,7 @@ typedef sycl::event (*binary_strided_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, std::vector &, size_t, size_t, @@ -681,7 +682,7 @@ typedef sycl::event (*binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, std::vector &, size_t, size_t, @@ -711,7 +712,7 @@ template -sycl::event binary_contig_impl(sycl::queue exec_q, +sycl::event binary_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -755,7 +756,7 @@ template class kernel_name> sycl::event -binary_strided_impl(sycl::queue exec_q, +binary_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -799,7 +800,7 @@ template class kernel_name> sycl::event binary_contig_matrix_contig_row_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -877,7 +878,7 @@ template class kernel_name> sycl::event binary_contig_row_contig_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -940,7 +941,7 @@ sycl::event binary_contig_row_contig_matrix_broadcast_impl( sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(comp_ev); - sycl::context ctx = exec_q.get_context(); + const sycl::context &ctx = exec_q.get_context(); cgh.host_task([ctx, padded_vec]() { sycl::free(padded_vec, ctx); }); }); host_tasks.push_back(tmp_cleanup_ev); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp index 505a40acc5..614c7f4092 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp @@ -248,7 +248,7 @@ struct BinaryInplaceRowMatrixBroadcastingFunctor // Typedefs for function pointers typedef sycl::event (*binary_inplace_contig_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, py::ssize_t, @@ -257,7 +257,7 @@ typedef sycl::event (*binary_inplace_contig_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_inplace_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, const py::ssize_t *, @@ -269,7 +269,7 @@ typedef sycl::event (*binary_inplace_strided_impl_fn_ptr_t)( const std::vector &); typedef sycl::event (*binary_inplace_row_matrix_broadcast_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, std::vector &, size_t, size_t, @@ -288,7 +288,7 @@ template sycl::event -binary_inplace_contig_impl(sycl::queue exec_q, +binary_inplace_contig_impl(sycl::queue &exec_q, size_t nelems, const char *rhs_p, py::ssize_t rhs_offset, @@ -324,7 +324,7 @@ template class kernel_name> sycl::event -binary_inplace_strided_impl(sycl::queue exec_q, +binary_inplace_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -361,7 +361,7 @@ template class kernel_name> sycl::event binary_inplace_row_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -419,7 +419,7 @@ sycl::event binary_inplace_row_matrix_broadcast_impl( sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(comp_ev); - sycl::context ctx = exec_q.get_context(); + const sycl::context &ctx = exec_q.get_context(); cgh.host_task([ctx, padded_vec]() { sycl::free(padded_vec, ctx); }); }); host_tasks.push_back(tmp_cleanup_ev); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp index 7000d83ff1..3b0a1584de 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp @@ -114,7 +114,7 @@ template class conj_contig_kernel; template -sycl::event conj_contig_impl(sycl::queue exec_q, +sycl::event conj_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -155,7 +155,7 @@ template class conj_strided_kernel; template sycl::event -conj_strided_impl(sycl::queue exec_q, +conj_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp index 4988cbdce0..f7c66d5f68 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp @@ -189,7 +189,7 @@ template class cos_contig_kernel; template -sycl::event cos_contig_impl(sycl::queue exec_q, +sycl::event cos_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -229,7 +229,7 @@ template struct CosTypeMapFactory template class cos_strided_kernel; template -sycl::event cos_strided_impl(sycl::queue exec_q, +sycl::event cos_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp index 1fb6bcc0c4..fbcc7e40f9 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp @@ -177,7 +177,7 @@ template class cosh_contig_kernel; template -sycl::event cosh_contig_impl(sycl::queue exec_q, +sycl::event cosh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -218,7 +218,7 @@ template class cosh_strided_kernel; template sycl::event -cosh_strided_impl(sycl::queue exec_q, +cosh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp index 7418bd1de9..cd726f72ea 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp @@ -158,7 +158,7 @@ template -sycl::event equal_contig_impl(sycl::queue exec_q, +sycl::event equal_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -206,7 +206,7 @@ class equal_strided_kernel; template sycl::event -equal_strided_impl(sycl::queue exec_q, +equal_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp index 9a02402381..003de44c27 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp @@ -146,7 +146,7 @@ template class exp_contig_kernel; template -sycl::event exp_contig_impl(sycl::queue exec_q, +sycl::event exp_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -186,7 +186,7 @@ template struct ExpTypeMapFactory template class exp_strided_kernel; template -sycl::event exp_strided_impl(sycl::queue exec_q, +sycl::event exp_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index b4bfecb22f..3f6a73b6d3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -162,18 +162,11 @@ template struct Expm1OutputType td_ns::DefaultResultEntry>::result_type; }; -typedef sycl::event (*expm1_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template class expm1_contig_kernel; template -sycl::event expm1_contig_impl(sycl::queue exec_q, +sycl::event expm1_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -213,21 +206,9 @@ template struct Expm1TypeMapFactory template class expm1_strided_kernel; -typedef sycl::event (*expm1_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -expm1_strided_impl(sycl::queue exec_q, +expm1_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp index 9c16a805b3..e675407d0b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp @@ -114,7 +114,7 @@ template class floor_contig_kernel; template -sycl::event floor_contig_impl(sycl::queue exec_q, +sycl::event floor_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -155,7 +155,7 @@ template class floor_strided_kernel; template sycl::event -floor_strided_impl(sycl::queue exec_q, +floor_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index e8d4f524fd..ad75924070 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -225,7 +225,7 @@ class floor_divide_contig_kernel; template sycl::event -floor_divide_contig_impl(sycl::queue exec_q, +floor_divide_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -276,7 +276,7 @@ class floor_divide_strided_kernel; template sycl::event -floor_divide_strided_impl(sycl::queue exec_q, +floor_divide_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp index 23186b7633..2a151ce737 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp @@ -171,7 +171,7 @@ template -sycl::event greater_contig_impl(sycl::queue exec_q, +sycl::event greater_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -219,7 +219,7 @@ class greater_strided_kernel; template sycl::event -greater_strided_impl(sycl::queue exec_q, +greater_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp index 92b7189fc7..5704336990 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp @@ -173,7 +173,7 @@ class greater_equal_contig_kernel; template sycl::event -greater_equal_contig_impl(sycl::queue exec_q, +greater_equal_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -225,7 +225,7 @@ class greater_equal_strided_kernel; template sycl::event -greater_equal_strided_impl(sycl::queue exec_q, +greater_equal_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp index a12b84e4c6..a369c54f24 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp @@ -124,7 +124,7 @@ template -sycl::event hypot_contig_impl(sycl::queue exec_q, +sycl::event hypot_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -173,7 +173,7 @@ class hypot_strided_kernel; template sycl::event -hypot_strided_impl(sycl::queue exec_q, +hypot_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp index b094355ad0..64da603037 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp @@ -113,7 +113,7 @@ template class imag_contig_kernel; template -sycl::event imag_contig_impl(sycl::queue exec_q, +sycl::event imag_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -154,7 +154,7 @@ template class imag_strided_kernel; template sycl::event -imag_strided_impl(sycl::queue exec_q, +imag_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp index b075521fdb..1d8f177e40 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp @@ -117,7 +117,7 @@ template class isfinite_contig_kernel; template -sycl::event isfinite_contig_impl(sycl::queue exec_q, +sycl::event isfinite_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -152,7 +152,7 @@ template class isfinite_strided_kernel; template sycl::event -isfinite_strided_impl(sycl::queue exec_q, +isfinite_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp index 22a83b1128..d9afdb9317 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp @@ -115,7 +115,7 @@ template class isinf_contig_kernel; template -sycl::event isinf_contig_impl(sycl::queue exec_q, +sycl::event isinf_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -149,7 +149,7 @@ template class isinf_strided_kernel; template sycl::event -isinf_strided_impl(sycl::queue exec_q, +isinf_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp index da5e08a9ce..b5051ab833 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp @@ -113,7 +113,7 @@ template class isnan_contig_kernel; template -sycl::event isnan_contig_impl(sycl::queue exec_q, +sycl::event isnan_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -147,7 +147,7 @@ template class isnan_strided_kernel; template sycl::event -isnan_strided_impl(sycl::queue exec_q, +isnan_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp index 3562ee08d8..c33d6d7c10 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp @@ -170,7 +170,7 @@ template -sycl::event less_contig_impl(sycl::queue exec_q, +sycl::event less_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -217,7 +217,7 @@ class less_strided_kernel; template sycl::event -less_strided_impl(sycl::queue exec_q, +less_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp index a316f8abf3..47e2301fe7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp @@ -171,7 +171,7 @@ template -sycl::event less_equal_contig_impl(sycl::queue exec_q, +sycl::event less_equal_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -220,7 +220,7 @@ class less_equal_strided_kernel; template sycl::event -less_equal_strided_impl(sycl::queue exec_q, +less_equal_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp index 9fd366ba39..abcc899fc0 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp @@ -93,18 +93,11 @@ template struct LogOutputType td_ns::DefaultResultEntry>::result_type; }; -typedef sycl::event (*log_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template class log_contig_kernel; template -sycl::event log_contig_impl(sycl::queue exec_q, +sycl::event log_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -144,20 +137,8 @@ template struct LogTypeMapFactory template class log_strided_kernel; -typedef sycl::event (*log_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template -sycl::event log_strided_impl(sycl::queue exec_q, +sycl::event log_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp index 8cb723f2b9..afcf8aa085 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp @@ -120,18 +120,11 @@ template struct Log10OutputType td_ns::DefaultResultEntry>::result_type; }; -typedef sycl::event (*log10_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template class log10_contig_kernel; template -sycl::event log10_contig_impl(sycl::queue exec_q, +sycl::event log10_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -171,21 +164,9 @@ template struct Log10TypeMapFactory template class log10_strided_kernel; -typedef sycl::event (*log10_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -log10_strided_impl(sycl::queue exec_q, +log10_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp index f227760f46..6d7a56ccf5 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -129,18 +129,11 @@ template struct Log1pOutputType td_ns::DefaultResultEntry>::result_type; }; -typedef sycl::event (*log1p_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template class log1p_contig_kernel; template -sycl::event log1p_contig_impl(sycl::queue exec_q, +sycl::event log1p_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -180,21 +173,9 @@ template struct Log1pTypeMapFactory template class log1p_strided_kernel; -typedef sycl::event (*log1p_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -log1p_strided_impl(sycl::queue exec_q, +log1p_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp index e7ae2911ac..533d0120df 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp @@ -116,18 +116,11 @@ template struct Log2OutputType td_ns::DefaultResultEntry>::result_type; }; -typedef sycl::event (*log2_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template class log2_contig_kernel; template -sycl::event log2_contig_impl(sycl::queue exec_q, +sycl::event log2_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -167,21 +160,9 @@ template struct Log2TypeMapFactory template class log2_strided_kernel; -typedef sycl::event (*log2_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -log2_strided_impl(sycl::queue exec_q, +log2_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 0721b7582b..90b7997a37 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -157,7 +157,7 @@ template -sycl::event logaddexp_contig_impl(sycl::queue exec_q, +sycl::event logaddexp_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -207,7 +207,7 @@ class logaddexp_strided_kernel; template sycl::event -logaddexp_strided_impl(sycl::queue exec_q, +logaddexp_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp index 7e5091c04c..10e4e0cbff 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp @@ -166,7 +166,7 @@ class logical_and_contig_kernel; template sycl::event -logical_and_contig_impl(sycl::queue exec_q, +logical_and_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -217,7 +217,7 @@ class logical_and_strided_kernel; template sycl::event -logical_and_strided_impl(sycl::queue exec_q, +logical_and_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp index 1062950461..78bacbe686 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp @@ -93,7 +93,7 @@ class logical_not_contig_kernel; template sycl::event -logical_not_contig_impl(sycl::queue exec_q, +logical_not_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -129,7 +129,7 @@ class logical_not_strided_kernel; template sycl::event -logical_not_strided_impl(sycl::queue exec_q, +logical_not_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp index 88e17d37cd..bfb1288870 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp @@ -164,7 +164,7 @@ template -sycl::event logical_or_contig_impl(sycl::queue exec_q, +sycl::event logical_or_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -214,7 +214,7 @@ class logical_or_strided_kernel; template sycl::event -logical_or_strided_impl(sycl::queue exec_q, +logical_or_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp index 5ba2ca6bdb..44d361cfc1 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp @@ -167,7 +167,7 @@ class logical_xor_contig_kernel; template sycl::event -logical_xor_contig_impl(sycl::queue exec_q, +logical_xor_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -218,7 +218,7 @@ class logical_xor_strided_kernel; template sycl::event -logical_xor_strided_impl(sycl::queue exec_q, +logical_xor_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp index 57fcd46102..324f3f5ad2 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp @@ -188,7 +188,7 @@ template -sycl::event maximum_contig_impl(sycl::queue exec_q, +sycl::event maximum_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -237,7 +237,7 @@ class maximum_strided_kernel; template sycl::event -maximum_strided_impl(sycl::queue exec_q, +maximum_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp index 0ae1d056d0..9a7ec72e56 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -188,7 +188,7 @@ template -sycl::event minimum_contig_impl(sycl::queue exec_q, +sycl::event minimum_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -237,7 +237,7 @@ class minimum_strided_kernel; template sycl::event -minimum_strided_impl(sycl::queue exec_q, +minimum_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp index aa416d196d..c316279a76 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp @@ -176,7 +176,7 @@ template -sycl::event multiply_contig_impl(sycl::queue exec_q, +sycl::event multiply_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -226,7 +226,7 @@ class multiply_strided_kernel; template sycl::event -multiply_strided_impl(sycl::queue exec_q, +multiply_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -277,7 +277,7 @@ using MultiplyContigMatrixContigRowBroadcastingFunctor = template sycl::event multiply_contig_matrix_contig_row_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -327,7 +327,7 @@ struct MultiplyContigMatrixContigRowBroadcastFactory template sycl::event multiply_contig_row_contig_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -422,7 +422,7 @@ class multiply_inplace_contig_kernel; template sycl::event -multiply_inplace_contig_impl(sycl::queue exec_q, +multiply_inplace_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, py::ssize_t arg_offset, @@ -460,7 +460,7 @@ class multiply_inplace_strided_kernel; template sycl::event multiply_inplace_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -509,7 +509,7 @@ using MultiplyInplaceRowMatrixBroadcastingFunctor = template sycl::event multiply_inplace_row_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp index 4cfc231d7a..cbeeb60b7c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp @@ -97,15 +97,8 @@ template struct NegativeOutputType template class negative_contig_kernel; -typedef sycl::event (*negative_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template -sycl::event negative_contig_impl(sycl::queue exec_q, +sycl::event negative_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -150,21 +143,9 @@ using NegativeStridedFunctor = elementwise_common:: template class negative_strided_kernel; -typedef sycl::event (*negative_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -negative_strided_impl(sycl::queue exec_q, +negative_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp index 7d408e7f5c..88e077b402 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp @@ -174,7 +174,7 @@ template -sycl::event not_equal_contig_impl(sycl::queue exec_q, +sycl::event not_equal_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -223,7 +223,7 @@ class not_equal_strided_kernel; template sycl::event -not_equal_strided_impl(sycl::queue exec_q, +not_equal_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp index 2d81a0cc48..cbeba2e91d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp @@ -112,15 +112,8 @@ template struct PositiveOutputType template class positive_contig_kernel; -typedef sycl::event (*positive_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template -sycl::event positive_contig_impl(sycl::queue exec_q, +sycl::event positive_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -165,21 +158,9 @@ using PositiveStridedFunctor = elementwise_common:: template class positive_strided_kernel; -typedef sycl::event (*positive_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -positive_strided_impl(sycl::queue exec_q, +positive_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp index e3b44f3191..ba9241b8db 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -204,7 +204,7 @@ template -sycl::event pow_contig_impl(sycl::queue exec_q, +sycl::event pow_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -251,7 +251,7 @@ template class pow_strided_kernel; template -sycl::event pow_strided_impl(sycl::queue exec_q, +sycl::event pow_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp index b8ad1042ff..dcaa4b0f5f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -114,7 +114,7 @@ template class proj_contig_kernel; template -sycl::event proj_contig_impl(sycl::queue exec_q, +sycl::event proj_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -127,7 +127,7 @@ sycl::event proj_contig_impl(sycl::queue exec_q, template sycl::event -proj_workaround_contig_impl(sycl::queue exec_q, +proj_workaround_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -185,7 +185,7 @@ template class proj_strided_kernel; template sycl::event -proj_strided_impl(sycl::queue exec_q, +proj_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp index 3faf0ce553..294b796e96 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp @@ -113,7 +113,7 @@ template class real_contig_kernel; template -sycl::event real_contig_impl(sycl::queue exec_q, +sycl::event real_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -154,7 +154,7 @@ template class real_strided_kernel; template sycl::event -real_strided_impl(sycl::queue exec_q, +real_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index 54bc42c7c6..6cd306a900 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -226,7 +226,7 @@ template -sycl::event remainder_contig_impl(sycl::queue exec_q, +sycl::event remainder_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -275,7 +275,7 @@ class remainder_strided_kernel; template sycl::event -remainder_strided_impl(sycl::queue exec_q, +remainder_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp index 8fceacfcef..84d8fb7252 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp @@ -125,7 +125,7 @@ template class round_contig_kernel; template -sycl::event round_contig_impl(sycl::queue exec_q, +sycl::event round_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -166,7 +166,7 @@ template class round_strided_kernel; template sycl::event -round_strided_impl(sycl::queue exec_q, +round_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp index b59b0c49ad..fc3d44dcfa 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp @@ -128,15 +128,8 @@ template struct SignOutputType template class sign_contig_kernel; -typedef sycl::event (*sign_contig_impl_fn_ptr_t)( - sycl::queue, - size_t, - const char *, - char *, - const std::vector &); - template -sycl::event sign_contig_impl(sycl::queue exec_q, +sycl::event sign_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -180,21 +173,9 @@ using SignStridedFunctor = elementwise_common:: template class sign_strided_kernel; -typedef sycl::event (*sign_strided_impl_fn_ptr_t)( - sycl::queue, - size_t, - int, - const py::ssize_t *, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &, - const std::vector &); - template sycl::event -sign_strided_impl(sycl::queue exec_q, +sign_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp index 98b9df0716..0f509f7950 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp @@ -101,7 +101,7 @@ template class signbit_contig_kernel; template -sycl::event signbit_contig_impl(sycl::queue exec_q, +sycl::event signbit_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -142,7 +142,7 @@ template class signbit_strided_kernel; template sycl::event -signbit_strided_impl(sycl::queue exec_q, +signbit_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp index 70ebf053ff..b9f03e6234 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -204,7 +204,7 @@ template class sin_contig_kernel; template -sycl::event sin_contig_impl(sycl::queue exec_q, +sycl::event sin_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -244,7 +244,7 @@ template struct SinTypeMapFactory template class sin_strided_kernel; template -sycl::event sin_strided_impl(sycl::queue exec_q, +sycl::event sin_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index 9ce2925246..3a8d05d774 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -180,7 +180,7 @@ template class sinh_contig_kernel; template -sycl::event sinh_contig_impl(sycl::queue exec_q, +sycl::event sinh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -221,7 +221,7 @@ template class sinh_strided_kernel; template sycl::event -sinh_strided_impl(sycl::queue exec_q, +sinh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index 40fda0e147..efa580d70e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -279,7 +279,7 @@ template class sqrt_contig_kernel; template -sycl::event sqrt_contig_impl(sycl::queue exec_q, +sycl::event sqrt_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -320,7 +320,7 @@ template class sqrt_strided_kernel; template sycl::event -sqrt_strided_impl(sycl::queue exec_q, +sqrt_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp index 517cf2dc26..6b5f372c3d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp @@ -126,7 +126,7 @@ template class square_contig_kernel; template -sycl::event square_contig_impl(sycl::queue exec_q, +sycl::event square_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -167,7 +167,7 @@ template class square_strided_kernel; template sycl::event -square_strided_impl(sycl::queue exec_q, +square_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp index 652308b2c4..3eb8420933 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp @@ -174,7 +174,7 @@ template -sycl::event subtract_contig_impl(sycl::queue exec_q, +sycl::event subtract_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -223,7 +223,7 @@ class subtract_strided_kernel; template sycl::event -subtract_strided_impl(sycl::queue exec_q, +subtract_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -285,7 +285,7 @@ class subtract_row_matrix_broadcast_sg_krn; template sycl::event subtract_contig_matrix_contig_row_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -338,7 +338,7 @@ struct SubtractContigMatrixContigRowBroadcastFactory template sycl::event subtract_contig_row_contig_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -435,7 +435,7 @@ class subtract_inplace_contig_kernel; template sycl::event -subtract_inplace_contig_impl(sycl::queue exec_q, +subtract_inplace_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, py::ssize_t arg_offset, @@ -473,7 +473,7 @@ class subtract_inplace_strided_kernel; template sycl::event subtract_inplace_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -522,7 +522,7 @@ using SubtractInplaceRowMatrixBroadcastingFunctor = template sycl::event subtract_inplace_row_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp index 25bcea68e3..45f931b7f4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp @@ -155,7 +155,7 @@ template class tan_contig_kernel; template -sycl::event tan_contig_impl(sycl::queue exec_q, +sycl::event tan_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -195,7 +195,7 @@ template struct TanTypeMapFactory template class tan_strided_kernel; template -sycl::event tan_strided_impl(sycl::queue exec_q, +sycl::event tan_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp index 41f5b5bd5e..ef943319b2 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp @@ -149,7 +149,7 @@ template class tanh_contig_kernel; template -sycl::event tanh_contig_impl(sycl::queue exec_q, +sycl::event tanh_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -190,7 +190,7 @@ template class tanh_strided_kernel; template sycl::event -tanh_strided_impl(sycl::queue exec_q, +tanh_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp index 89e5f5c67c..9f488e6598 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp @@ -156,7 +156,7 @@ class true_divide_contig_kernel; template sycl::event -true_divide_contig_impl(sycl::queue exec_q, +true_divide_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg1_p, py::ssize_t arg1_offset, @@ -206,7 +206,7 @@ class true_divide_strided_kernel; template sycl::event -true_divide_strided_impl(sycl::queue exec_q, +true_divide_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, @@ -269,7 +269,7 @@ class true_divide_row_matrix_broadcast_sg_krn; template sycl::event true_divide_contig_matrix_contig_row_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, @@ -322,7 +322,7 @@ struct TrueDivideContigMatrixContigRowBroadcastFactory template sycl::event true_divide_contig_row_contig_matrix_broadcast_impl( - sycl::queue exec_q, + sycl::queue &exec_q, std::vector &host_tasks, size_t n0, size_t n1, diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp index 5fb4074ec9..33e942dd6a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp @@ -111,7 +111,7 @@ template class trunc_contig_kernel; template -sycl::event trunc_contig_impl(sycl::queue exec_q, +sycl::event trunc_contig_impl(sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p, @@ -152,7 +152,7 @@ template class trunc_strided_kernel; template sycl::event -trunc_strided_impl(sycl::queue exec_q, +trunc_strided_impl(sycl::queue &exec_q, size_t nelems, int nd, const py::ssize_t *shape_and_strides, diff --git a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp index 0f60c7a4b2..6acf0a9f50 100644 --- a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp @@ -155,7 +155,7 @@ class TakeFunctor } }; -typedef sycl::event (*take_fn_ptr_t)(sycl::queue, +typedef sycl::event (*take_fn_ptr_t)(sycl::queue &, size_t, size_t, int, @@ -173,7 +173,7 @@ typedef sycl::event (*take_fn_ptr_t)(sycl::queue, const std::vector &); template -sycl::event take_impl(sycl::queue q, +sycl::event take_impl(sycl::queue &q, size_t orthog_nelems, size_t ind_nelems, int nd, @@ -285,7 +285,7 @@ class PutFunctor } }; -typedef sycl::event (*put_fn_ptr_t)(sycl::queue, +typedef sycl::event (*put_fn_ptr_t)(sycl::queue &, size_t, size_t, int, @@ -303,7 +303,7 @@ typedef sycl::event (*put_fn_ptr_t)(sycl::queue, const std::vector &); template -sycl::event put_impl(sycl::queue q, +sycl::event put_impl(sycl::queue &q, size_t orthog_nelems, size_t ind_nelems, int nd, diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp index fd3fcc9681..a830efb393 100644 --- a/dpctl/tensor/libtensor/include/kernels/reductions.hpp +++ b/dpctl/tensor/libtensor/include/kernels/reductions.hpp @@ -207,7 +207,7 @@ struct ReductionOverGroupWithAtomicFunctor }; typedef sycl::event (*sum_reduction_strided_impl_fn_ptr)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -243,7 +243,7 @@ using dpctl::tensor::sycl_utils::choose_workgroup_size; template sycl::event sum_reduction_over_group_with_atomics_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t iter_nelems, // number of reductions (num. of rows in a matrix // when reducing over rows) size_t reduction_nelems, // size of each reduction (length of rows, i.e. @@ -367,7 +367,7 @@ sycl::event sum_reduction_over_group_with_atomics_strided_impl( // Contig typedef sycl::event (*sum_reduction_contig_impl_fn_ptr)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -380,7 +380,7 @@ typedef sycl::event (*sum_reduction_contig_impl_fn_ptr)( /* @brief Reduce rows in a matrix */ template sycl::event sum_reduction_axis1_over_group_with_atomics_contig_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t iter_nelems, // number of reductions (num. of rows in a matrix // when reducing over rows) size_t reduction_nelems, // size of each reduction (length of rows, i.e. @@ -491,7 +491,7 @@ sycl::event sum_reduction_axis1_over_group_with_atomics_contig_impl( /* @brief Reduce rows in a matrix */ template sycl::event sum_reduction_axis0_over_group_with_atomics_contig_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t iter_nelems, // number of reductions (num. of cols in a matrix // when reducing over cols) size_t reduction_nelems, // size of each reduction (length of cols, i.e. @@ -662,7 +662,7 @@ class sum_reduction_over_group_temps_krn; template sycl::event sum_reduction_over_group_temps_strided_impl( - sycl::queue exec_q, + sycl::queue &exec_q, size_t iter_nelems, // number of reductions (num. of rows in a matrix // when reducing over rows) size_t reduction_nelems, // size of each reduction (length of rows, i.e. diff --git a/dpctl/tensor/libtensor/include/kernels/repeat.hpp b/dpctl/tensor/libtensor/include/kernels/repeat.hpp index 4129f358df..da1989fc3c 100644 --- a/dpctl/tensor/libtensor/include/kernels/repeat.hpp +++ b/dpctl/tensor/libtensor/include/kernels/repeat.hpp @@ -107,7 +107,7 @@ class RepeatSequenceFunctor }; typedef sycl::event (*repeat_by_sequence_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -128,7 +128,7 @@ typedef sycl::event (*repeat_by_sequence_fn_ptr_t)( template sycl::event -repeat_by_sequence_impl(sycl::queue q, +repeat_by_sequence_impl(sycl::queue &q, size_t orthog_nelems, size_t src_axis_nelems, const char *src_cp, @@ -191,7 +191,7 @@ template struct RepeatSequenceFactory }; typedef sycl::event (*repeat_by_sequence_1d_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, char *, @@ -206,7 +206,7 @@ typedef sycl::event (*repeat_by_sequence_1d_fn_ptr_t)( const std::vector &); template -sycl::event repeat_by_sequence_1d_impl(sycl::queue q, +sycl::event repeat_by_sequence_1d_impl(sycl::queue &q, size_t src_nelems, const char *src_cp, char *dst_cp, @@ -306,7 +306,7 @@ class RepeatScalarFunctor }; typedef sycl::event (*repeat_by_scalar_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, size_t, const char *, @@ -323,7 +323,7 @@ typedef sycl::event (*repeat_by_scalar_fn_ptr_t)( const std::vector &); template -sycl::event repeat_by_scalar_impl(sycl::queue q, +sycl::event repeat_by_scalar_impl(sycl::queue &q, size_t orthog_nelems, size_t dst_axis_nelems, const char *src_cp, @@ -375,7 +375,7 @@ template struct RepeatScalarFactory }; typedef sycl::event (*repeat_by_scalar_1d_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, char *, @@ -387,7 +387,7 @@ typedef sycl::event (*repeat_by_scalar_1d_fn_ptr_t)( const std::vector &); template -sycl::event repeat_by_scalar_1d_impl(sycl::queue q, +sycl::event repeat_by_scalar_1d_impl(sycl::queue &q, size_t dst_nelems, const char *src_cp, char *dst_cp, diff --git a/dpctl/tensor/libtensor/include/kernels/where.hpp b/dpctl/tensor/libtensor/include/kernels/where.hpp index 9da5466dbe..fc9546a9a8 100644 --- a/dpctl/tensor/libtensor/include/kernels/where.hpp +++ b/dpctl/tensor/libtensor/include/kernels/where.hpp @@ -142,7 +142,7 @@ class WhereContigFunctor }; typedef sycl::event (*where_contig_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, const char *, const char *, @@ -151,7 +151,7 @@ typedef sycl::event (*where_contig_impl_fn_ptr_t)( const std::vector &); template -sycl::event where_contig_impl(sycl::queue q, +sycl::event where_contig_impl(sycl::queue &q, size_t nelems, const char *cond_cp, const char *x1_cp, @@ -221,7 +221,7 @@ class WhereStridedFunctor }; typedef sycl::event (*where_strided_impl_fn_ptr_t)( - sycl::queue, + sycl::queue &, size_t, int, const char *, @@ -236,7 +236,7 @@ typedef sycl::event (*where_strided_impl_fn_ptr_t)( const std::vector &); template -sycl::event where_strided_impl(sycl::queue q, +sycl::event where_strided_impl(sycl::queue &q, size_t nelems, int nd, const char *cond_cp, From 0370b09b0cb6d96ca507e6d8ae4b67ca3c92acd5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Sep 2023 20:20:54 -0500 Subject: [PATCH 3/9] Used sycl::queue & for signatures in Python bindings --- .../tensor/libtensor/source/accumulators.cpp | 9 +- .../tensor/libtensor/source/accumulators.hpp | 4 +- .../source/boolean_advanced_indexing.cpp | 8 +- .../source/boolean_advanced_indexing.hpp | 8 +- .../libtensor/source/boolean_reductions.cpp | 8 +- .../libtensor/source/boolean_reductions.hpp | 2 +- .../source/copy_and_cast_usm_to_usm.cpp | 2 +- .../source/copy_and_cast_usm_to_usm.hpp | 2 +- .../libtensor/source/copy_for_reshape.cpp | 2 +- .../libtensor/source/copy_for_reshape.hpp | 2 +- .../tensor/libtensor/source/copy_for_roll.cpp | 4 +- .../tensor/libtensor/source/copy_for_roll.hpp | 4 +- .../copy_numpy_ndarray_into_usm_ndarray.cpp | 2 +- .../copy_numpy_ndarray_into_usm_ndarray.hpp | 2 +- .../source/elementwise_functions.cpp | 352 ++++++++---------- dpctl/tensor/libtensor/source/eye_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/eye_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/full_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/full_ctor.hpp | 2 +- .../source/integer_advanced_indexing.cpp | 6 +- .../source/integer_advanced_indexing.hpp | 4 +- .../libtensor/source/linear_sequences.cpp | 4 +- .../libtensor/source/linear_sequences.hpp | 4 +- dpctl/tensor/libtensor/source/repeat.cpp | 4 +- dpctl/tensor/libtensor/source/repeat.hpp | 4 +- .../libtensor/source/sum_reductions.cpp | 4 +- dpctl/tensor/libtensor/source/triul_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/triul_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/where.cpp | 2 +- dpctl/tensor/libtensor/source/where.hpp | 2 +- 30 files changed, 209 insertions(+), 248 deletions(-) diff --git a/dpctl/tensor/libtensor/source/accumulators.cpp b/dpctl/tensor/libtensor/source/accumulators.cpp index e52283ef24..40f4424ef9 100644 --- a/dpctl/tensor/libtensor/source/accumulators.cpp +++ b/dpctl/tensor/libtensor/source/accumulators.cpp @@ -99,7 +99,7 @@ void populate_mask_positions_dispatch_vectors(void) size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, const dpctl::tensor::usm_ndarray &cumsum, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { // cumsum is 1D @@ -155,8 +155,7 @@ size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, ? mask_positions_contig_i32_dispatch_vector[mask_typeid] : mask_positions_contig_i64_dispatch_vector[mask_typeid]; - return fn(std::move(exec_q), mask_size, mask_data, cumsum_data, - depends); + return fn(exec_q, mask_size, mask_data, cumsum_data, depends); } const py::ssize_t *shape = mask.get_shape_raw(); @@ -236,7 +235,7 @@ void populate_cumsum_1d_dispatch_vectors(void) size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &cumsum, - sycl::queue exec_q, + sycl::queue &exec_q, std::vector const &depends) { // cumsum is 1D @@ -291,7 +290,7 @@ size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, "this cumsum requires integer type, got src_typeid=" + std::to_string(src_typeid)); } - return fn(std::move(exec_q), src_size, src_data, cumsum_data, depends); + return fn(exec_q, src_size, src_data, cumsum_data, depends); } const py::ssize_t *shape = src.get_shape_raw(); diff --git a/dpctl/tensor/libtensor/source/accumulators.hpp b/dpctl/tensor/libtensor/source/accumulators.hpp index e3cdb035bc..4979eab54f 100644 --- a/dpctl/tensor/libtensor/source/accumulators.hpp +++ b/dpctl/tensor/libtensor/source/accumulators.hpp @@ -41,14 +41,14 @@ extern void populate_mask_positions_dispatch_vectors(void); extern size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, const dpctl::tensor::usm_ndarray &cumsum, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void populate_cumsum_1d_dispatch_vectors(void); extern size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &cumsum, - sycl::queue exec_q, + sycl::queue &exec_q, std::vector const &depends = {}); } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index 1bbb4109b6..226272536a 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -114,8 +114,8 @@ py_extract(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, - dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); @@ -449,7 +449,7 @@ py_place(const dpctl::tensor::usm_ndarray &dst, int axis_start, // axis_start <= mask_i < axis_end int axis_end, const dpctl::tensor::usm_ndarray &rhs, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int dst_nd = dst.get_ndim(); @@ -718,7 +718,7 @@ py_nonzero(const dpctl::tensor::usm_ndarray &indexes, // int32/int64 2D output array, C-contiguous const std::vector &mask_shape, // shape of array from which cumsum was computed - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { if (!dpctl::utils::queues_are_compatible(exec_q, {cumsum, indexes})) { diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp index cc920477fb..26f1c6a646 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp @@ -43,8 +43,8 @@ py_extract(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &cumsum, int axis_start, // axis_start <= mask_i < axis_end int axis_end, - dpctl::tensor::usm_ndarray dst, - sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, const std::vector &depends = {}); extern void populate_masked_extract_dispatch_vectors(void); @@ -55,7 +55,7 @@ py_place(const dpctl::tensor::usm_ndarray &dst, int axis_start, // axis_start <= mask_i < axis_end int axis_end, const dpctl::tensor::usm_ndarray &rhs, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void populate_masked_place_dispatch_vectors(void); @@ -67,7 +67,7 @@ py_nonzero(const dpctl::tensor::usm_ndarray &indexes, // int32 2D output array, C-contiguous const std::vector &mask_shape, // shape of array from which cumsum was computed - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.cpp b/dpctl/tensor/libtensor/source/boolean_reductions.cpp index 9840b1b912..5f3c1f5e51 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.cpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.cpp @@ -149,10 +149,10 @@ void init_boolean_reduction_functions(py::module_ m) using impl::all_reduction_strided_dispatch_vector; auto all_pyapi = [&](const arrayT &src, int trailing_dims_to_reduce, - const arrayT &dst, sycl::queue exec_q, + const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_boolean_reduction( - src, trailing_dims_to_reduce, dst, std::move(exec_q), depends, + src, trailing_dims_to_reduce, dst, exec_q, depends, all_reduction_axis1_contig_dispatch_vector, all_reduction_axis0_contig_dispatch_vector, all_reduction_strided_dispatch_vector); @@ -170,10 +170,10 @@ void init_boolean_reduction_functions(py::module_ m) using impl::any_reduction_strided_dispatch_vector; auto any_pyapi = [&](const arrayT &src, int trailing_dims_to_reduce, - const arrayT &dst, sycl::queue exec_q, + const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_boolean_reduction( - src, trailing_dims_to_reduce, dst, std::move(exec_q), depends, + src, trailing_dims_to_reduce, dst, exec_q, depends, any_reduction_axis1_contig_dispatch_vector, any_reduction_axis0_contig_dispatch_vector, any_reduction_strided_dispatch_vector); diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.hpp b/dpctl/tensor/libtensor/source/boolean_reductions.hpp index 1099e21008..09c95bfeae 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.hpp @@ -52,7 +52,7 @@ std::pair py_boolean_reduction(const dpctl::tensor::usm_ndarray &src, int trailing_dims_to_reduce, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends, const contig_dispatchT &axis1_contig_dispatch_vector, const contig_dispatchT &axis0_contig_dispatch_vector, diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index afd50e16bb..baaf3733a0 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -69,7 +69,7 @@ using dpctl::utils::keep_args_alive; std::pair copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { // array dimensions must be the same diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp index 04e7075a68..c2161f1ba6 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp @@ -40,7 +40,7 @@ namespace py_internal extern std::pair copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_copy_and_cast_usm_to_usm_dispatch_tables(); diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index 4ea0706f87..eb2c45a0cc 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -60,7 +60,7 @@ static copy_for_reshape_fn_ptr_t std::pair copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { py::ssize_t src_nelems = src.get_size(); diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp index c78fcf1d86..2f25a68480 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp @@ -40,7 +40,7 @@ namespace py_internal extern std::pair copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_copy_for_reshape_dispatch_vectors(); diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.cpp b/dpctl/tensor/libtensor/source/copy_for_roll.cpp index bac8e165dd..cabe904e64 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.cpp @@ -72,7 +72,7 @@ std::pair copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, py::ssize_t shift, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); @@ -254,7 +254,7 @@ std::pair copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, const std::vector &shifts, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.hpp b/dpctl/tensor/libtensor/source/copy_for_roll.hpp index 8e8112a6d3..38e84b9c6a 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.hpp @@ -41,14 +41,14 @@ extern std::pair copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, py::ssize_t shift, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern std::pair copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, const std::vector &shifts, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_copy_for_roll_dispatch_vectors(); diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp index 50e2f9eed0..5616cfa8db 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp @@ -56,7 +56,7 @@ static copy_and_cast_from_host_blocking_fn_ptr_t void copy_numpy_ndarray_into_usm_ndarray( const py::array &npy_src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_ndim = npy_src.ndim(); diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp index b482bec3eb..3f1833ec99 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp @@ -40,7 +40,7 @@ namespace py_internal extern void copy_numpy_ndarray_into_usm_ndarray( const py::array &npy_src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_copy_numpy_ndarray_into_usm_ndarray_dispatch_tables(void); diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 32b8cf630d..cca0ac7c0a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -2746,10 +2746,10 @@ void init_elementwise_functions(py::module_ m) using impl::abs_strided_dispatch_vector; auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, abs_output_typeid_vector, + src, dst, exec_q, depends, abs_output_typeid_vector, abs_contig_dispatch_vector, abs_strided_dispatch_vector); }; m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), @@ -2769,10 +2769,10 @@ void init_elementwise_functions(py::module_ m) using impl::acos_strided_dispatch_vector; auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, acos_output_typeid_vector, + src, dst, exec_q, depends, acos_output_typeid_vector, acos_contig_dispatch_vector, acos_strided_dispatch_vector); }; m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), @@ -2792,12 +2792,11 @@ void init_elementwise_functions(py::module_ m) using impl::acosh_strided_dispatch_vector; auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - acosh_output_typeid_vector, - acosh_contig_dispatch_vector, - acosh_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, acosh_output_typeid_vector, + acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); }; m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -2821,11 +2820,10 @@ void init_elementwise_functions(py::module_ m) auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - add_output_id_table, + src1, src2, dst, exec_q, depends, add_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) add_contig_dispatch_table, @@ -2855,10 +2853,10 @@ void init_elementwise_functions(py::module_ m) auto add_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, std::move(exec_q), depends, add_output_id_table, + src, dst, exec_q, depends, add_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) add_inplace_contig_dispatch_table, @@ -2883,10 +2881,10 @@ void init_elementwise_functions(py::module_ m) using impl::asin_strided_dispatch_vector; auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, asin_output_typeid_vector, + src, dst, exec_q, depends, asin_output_typeid_vector, asin_contig_dispatch_vector, asin_strided_dispatch_vector); }; m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), @@ -2906,12 +2904,11 @@ void init_elementwise_functions(py::module_ m) using impl::asinh_strided_dispatch_vector; auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - asinh_output_typeid_vector, - asinh_contig_dispatch_vector, - asinh_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, asinh_output_typeid_vector, + asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); }; m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -2930,7 +2927,7 @@ void init_elementwise_functions(py::module_ m) using impl::atan_output_typeid_vector; using impl::atan_strided_dispatch_vector; - auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( src, dst, exec_q, depends, atan_output_typeid_vector, @@ -2955,11 +2952,10 @@ void init_elementwise_functions(py::module_ m) auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - atan2_output_id_table, + src1, src2, dst, exec_q, depends, atan2_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) atan2_contig_dispatch_table, @@ -2994,12 +2990,11 @@ void init_elementwise_functions(py::module_ m) using impl::atanh_strided_dispatch_vector; auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - atanh_output_typeid_vector, - atanh_contig_dispatch_vector, - atanh_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, atanh_output_typeid_vector, + atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); }; m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3021,12 +3016,11 @@ void init_elementwise_functions(py::module_ m) auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - bitwise_and_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_and_contig_dispatch_table, @@ -3066,11 +3060,11 @@ void init_elementwise_functions(py::module_ m) &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, + src1, src2, dst, exec_q, depends, bitwise_left_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) @@ -3107,9 +3101,9 @@ void init_elementwise_functions(py::module_ m) using impl::bitwise_invert_strided_dispatch_vector; auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, bitwise_invert_output_typeid_vector, bitwise_invert_contig_dispatch_vector, bitwise_invert_strided_dispatch_vector); @@ -3135,12 +3129,11 @@ void init_elementwise_functions(py::module_ m) auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - bitwise_or_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_or_contig_dispatch_table, @@ -3180,11 +3173,11 @@ void init_elementwise_functions(py::module_ m) &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, + src1, src2, dst, exec_q, depends, bitwise_right_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) @@ -3223,12 +3216,11 @@ void init_elementwise_functions(py::module_ m) auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - bitwise_xor_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) bitwise_xor_contig_dispatch_table, @@ -3263,10 +3255,10 @@ void init_elementwise_functions(py::module_ m) using impl::ceil_strided_dispatch_vector; auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, ceil_output_typeid_vector, + src, dst, exec_q, depends, ceil_output_typeid_vector, ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); }; m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3286,10 +3278,10 @@ void init_elementwise_functions(py::module_ m) using impl::conj_strided_dispatch_vector; auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, conj_output_typeid_vector, + src, dst, exec_q, depends, conj_output_typeid_vector, conj_contig_dispatch_vector, conj_strided_dispatch_vector); }; m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3309,10 +3301,10 @@ void init_elementwise_functions(py::module_ m) using impl::cos_strided_dispatch_vector; auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, cos_output_typeid_vector, + src, dst, exec_q, depends, cos_output_typeid_vector, cos_contig_dispatch_vector, cos_strided_dispatch_vector); }; m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3332,10 +3324,10 @@ void init_elementwise_functions(py::module_ m) using impl::cosh_strided_dispatch_vector; auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, cosh_output_typeid_vector, + src, dst, exec_q, depends, cosh_output_typeid_vector, cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); }; m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3361,11 +3353,10 @@ void init_elementwise_functions(py::module_ m) auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - true_divide_output_id_table, + src1, src2, dst, exec_q, depends, true_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) true_divide_contig_dispatch_table, @@ -3400,11 +3391,10 @@ void init_elementwise_functions(py::module_ m) auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - equal_output_id_table, + src1, src2, dst, exec_q, depends, equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) equal_contig_dispatch_table, @@ -3439,10 +3429,10 @@ void init_elementwise_functions(py::module_ m) using impl::exp_strided_dispatch_vector; auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, exp_output_typeid_vector, + src, dst, exec_q, depends, exp_output_typeid_vector, exp_contig_dispatch_vector, exp_strided_dispatch_vector); }; m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3462,12 +3452,11 @@ void init_elementwise_functions(py::module_ m) using impl::expm1_strided_dispatch_vector; auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - expm1_output_typeid_vector, - expm1_contig_dispatch_vector, - expm1_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, expm1_output_typeid_vector, + expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); }; m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3487,12 +3476,11 @@ void init_elementwise_functions(py::module_ m) using impl::floor_strided_dispatch_vector; auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - floor_output_typeid_vector, - floor_contig_dispatch_vector, - floor_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, floor_output_typeid_vector, + floor_contig_dispatch_vector, floor_strided_dispatch_vector); }; m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3514,12 +3502,11 @@ void init_elementwise_functions(py::module_ m) auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - floor_divide_output_id_table, + src1, src2, dst, exec_q, depends, floor_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) floor_divide_contig_dispatch_table, @@ -3556,11 +3543,10 @@ void init_elementwise_functions(py::module_ m) auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - greater_output_id_table, + src1, src2, dst, exec_q, depends, greater_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) greater_contig_dispatch_table, @@ -3597,12 +3583,11 @@ void init_elementwise_functions(py::module_ m) auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - greater_equal_output_id_table, + src1, src2, dst, exec_q, depends, greater_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) greater_equal_contig_dispatch_table, @@ -3638,10 +3623,10 @@ void init_elementwise_functions(py::module_ m) using impl::imag_strided_dispatch_vector; auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, imag_output_typeid_vector, + src, dst, exec_q, depends, imag_output_typeid_vector, imag_contig_dispatch_vector, imag_strided_dispatch_vector); }; m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3662,9 +3647,9 @@ void init_elementwise_functions(py::module_ m) using impl::isfinite_strided_dispatch_vector; auto isfinite_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, isfinite_output_typeid_vector, isfinite_contig_dispatch_vector, isfinite_strided_dispatch_vector); @@ -3687,12 +3672,11 @@ void init_elementwise_functions(py::module_ m) using impl::isinf_strided_dispatch_vector; auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - isinf_output_typeid_vector, - isinf_contig_dispatch_vector, - isinf_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, isinf_output_typeid_vector, + isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); }; auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, @@ -3712,12 +3696,11 @@ void init_elementwise_functions(py::module_ m) using impl::isnan_strided_dispatch_vector; auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - isnan_output_typeid_vector, - isnan_contig_dispatch_vector, - isnan_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, isnan_output_typeid_vector, + isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); }; auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, @@ -3738,11 +3721,10 @@ void init_elementwise_functions(py::module_ m) auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - less_output_id_table, + src1, src2, dst, exec_q, depends, less_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) less_contig_dispatch_table, @@ -3779,12 +3761,11 @@ void init_elementwise_functions(py::module_ m) auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - less_equal_output_id_table, + src1, src2, dst, exec_q, depends, less_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) less_equal_contig_dispatch_table, @@ -3819,10 +3800,10 @@ void init_elementwise_functions(py::module_ m) using impl::log_strided_dispatch_vector; auto log_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, log_output_typeid_vector, + src, dst, exec_q, depends, log_output_typeid_vector, log_contig_dispatch_vector, log_strided_dispatch_vector); }; m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), @@ -3842,12 +3823,11 @@ void init_elementwise_functions(py::module_ m) using impl::log1p_strided_dispatch_vector; auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - log1p_output_typeid_vector, - log1p_contig_dispatch_vector, - log1p_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, log1p_output_typeid_vector, + log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); }; m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -3868,10 +3848,10 @@ void init_elementwise_functions(py::module_ m) using impl::log2_strided_dispatch_vector; auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, log2_output_typeid_vector, + src, dst, exec_q, depends, log2_output_typeid_vector, log2_contig_dispatch_vector, log2_strided_dispatch_vector); }; auto log2_result_type_pyapi = [&](const py::dtype &dtype) { @@ -3891,12 +3871,11 @@ void init_elementwise_functions(py::module_ m) using impl::log10_strided_dispatch_vector; auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - log10_output_typeid_vector, - log10_contig_dispatch_vector, - log10_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, log10_output_typeid_vector, + log10_contig_dispatch_vector, log10_strided_dispatch_vector); }; auto log10_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, @@ -3917,12 +3896,11 @@ void init_elementwise_functions(py::module_ m) auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - logaddexp_output_id_table, + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logaddexp_contig_dispatch_table, @@ -3959,12 +3937,11 @@ void init_elementwise_functions(py::module_ m) auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - logical_and_output_id_table, + src1, src2, dst, exec_q, depends, logical_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_and_contig_dispatch_table, @@ -3999,9 +3976,9 @@ void init_elementwise_functions(py::module_ m) using impl::logical_not_strided_dispatch_vector; auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, logical_not_output_typeid_vector, logical_not_contig_dispatch_vector, logical_not_strided_dispatch_vector); @@ -4027,12 +4004,11 @@ void init_elementwise_functions(py::module_ m) auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - logical_or_output_id_table, + src1, src2, dst, exec_q, depends, logical_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_or_contig_dispatch_table, @@ -4069,12 +4045,11 @@ void init_elementwise_functions(py::module_ m) auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - logical_xor_output_id_table, + src1, src2, dst, exec_q, depends, logical_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) logical_xor_contig_dispatch_table, @@ -4111,11 +4086,10 @@ void init_elementwise_functions(py::module_ m) auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - maximum_output_id_table, + src1, src2, dst, exec_q, depends, maximum_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) maximum_contig_dispatch_table, @@ -4152,11 +4126,10 @@ void init_elementwise_functions(py::module_ m) auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - minimum_output_id_table, + src1, src2, dst, exec_q, depends, minimum_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) minimum_contig_dispatch_table, @@ -4195,11 +4168,10 @@ void init_elementwise_functions(py::module_ m) auto multiply_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - multiply_output_id_table, + src1, src2, dst, exec_q, depends, multiply_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) multiply_contig_dispatch_table, @@ -4229,11 +4201,10 @@ void init_elementwise_functions(py::module_ m) auto multiply_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, std::move(exec_q), depends, - multiply_output_id_table, + src, dst, exec_q, depends, multiply_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) multiply_inplace_contig_dispatch_table, @@ -4258,9 +4229,9 @@ void init_elementwise_functions(py::module_ m) using impl::negative_strided_dispatch_vector; auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, negative_output_typeid_vector, negative_contig_dispatch_vector, negative_strided_dispatch_vector); @@ -4285,12 +4256,11 @@ void init_elementwise_functions(py::module_ m) auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - not_equal_output_id_table, + src1, src2, dst, exec_q, depends, not_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) not_equal_contig_dispatch_table, @@ -4325,9 +4295,9 @@ void init_elementwise_functions(py::module_ m) using impl::positive_strided_dispatch_vector; auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, positive_output_typeid_vector, positive_contig_dispatch_vector, positive_strided_dispatch_vector); @@ -4352,11 +4322,10 @@ void init_elementwise_functions(py::module_ m) auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - pow_output_id_table, + src1, src2, dst, exec_q, depends, pow_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) pow_contig_dispatch_table, @@ -4391,10 +4360,10 @@ void init_elementwise_functions(py::module_ m) using impl::proj_strided_dispatch_vector; auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, proj_output_typeid_vector, + src, dst, exec_q, depends, proj_output_typeid_vector, proj_contig_dispatch_vector, proj_strided_dispatch_vector); }; m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4414,10 +4383,10 @@ void init_elementwise_functions(py::module_ m) using impl::real_strided_dispatch_vector; auto real_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, real_output_typeid_vector, + src, dst, exec_q, depends, real_output_typeid_vector, real_contig_dispatch_vector, real_strided_dispatch_vector); }; m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4439,12 +4408,11 @@ void init_elementwise_functions(py::module_ m) auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - remainder_output_id_table, + src1, src2, dst, exec_q, depends, remainder_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) remainder_contig_dispatch_table, @@ -4479,12 +4447,11 @@ void init_elementwise_functions(py::module_ m) using impl::round_strided_dispatch_vector; auto round_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - round_output_typeid_vector, - round_contig_dispatch_vector, - round_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, round_output_typeid_vector, + round_contig_dispatch_vector, round_strided_dispatch_vector); }; m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -4504,10 +4471,10 @@ void init_elementwise_functions(py::module_ m) using impl::sign_strided_dispatch_vector; auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, sign_output_typeid_vector, + src, dst, exec_q, depends, sign_output_typeid_vector, sign_contig_dispatch_vector, sign_strided_dispatch_vector); }; m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4527,9 +4494,9 @@ void init_elementwise_functions(py::module_ m) using impl::signbit_strided_dispatch_vector; auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, + return py_unary_ufunc(src, dst, exec_q, depends, signbit_output_typeid_vector, signbit_contig_dispatch_vector, signbit_strided_dispatch_vector); @@ -4552,10 +4519,10 @@ void init_elementwise_functions(py::module_ m) using impl::sin_strided_dispatch_vector; auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, sin_output_typeid_vector, + src, dst, exec_q, depends, sin_output_typeid_vector, sin_contig_dispatch_vector, sin_strided_dispatch_vector); }; m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4574,10 +4541,10 @@ void init_elementwise_functions(py::module_ m) using impl::sinh_strided_dispatch_vector; auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, sinh_output_typeid_vector, + src, dst, exec_q, depends, sinh_output_typeid_vector, sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); }; m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4597,12 +4564,11 @@ void init_elementwise_functions(py::module_ m) using impl::square_strided_dispatch_vector; auto square_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - square_output_typeid_vector, - square_contig_dispatch_vector, - square_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, square_output_typeid_vector, + square_contig_dispatch_vector, square_strided_dispatch_vector); }; m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -4622,10 +4588,10 @@ void init_elementwise_functions(py::module_ m) using impl::sqrt_strided_dispatch_vector; auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, sqrt_output_typeid_vector, + src, dst, exec_q, depends, sqrt_output_typeid_vector, sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); }; m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4649,11 +4615,10 @@ void init_elementwise_functions(py::module_ m) auto subtract_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - subtract_output_id_table, + src1, src2, dst, exec_q, depends, subtract_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) subtract_contig_dispatch_table, @@ -4683,11 +4648,10 @@ void init_elementwise_functions(py::module_ m) auto subtract_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue exec_q, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, std::move(exec_q), depends, - subtract_output_id_table, + src, dst, exec_q, depends, subtract_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) subtract_inplace_contig_dispatch_table, @@ -4712,10 +4676,10 @@ void init_elementwise_functions(py::module_ m) using impl::tan_strided_dispatch_vector; auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, tan_output_typeid_vector, + src, dst, exec_q, depends, tan_output_typeid_vector, tan_contig_dispatch_vector, tan_strided_dispatch_vector); }; m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4735,10 +4699,10 @@ void init_elementwise_functions(py::module_ m) using impl::tanh_strided_dispatch_vector; auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, std::move(exec_q), depends, tanh_output_typeid_vector, + src, dst, exec_q, depends, tanh_output_typeid_vector, tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); }; m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), @@ -4758,12 +4722,11 @@ void init_elementwise_functions(py::module_ m) using impl::trunc_strided_dispatch_vector; auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, std::move(exec_q), depends, - trunc_output_typeid_vector, - trunc_contig_dispatch_vector, - trunc_strided_dispatch_vector); + return py_unary_ufunc( + src, dst, exec_q, depends, trunc_output_typeid_vector, + trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); }; m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); @@ -4785,11 +4748,10 @@ void init_elementwise_functions(py::module_ m) auto hypot_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, std::move(exec_q), depends, - hypot_output_id_table, + src1, src2, dst, exec_q, depends, hypot_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) hypot_contig_dispatch_table, diff --git a/dpctl/tensor/libtensor/source/eye_ctor.cpp b/dpctl/tensor/libtensor/source/eye_ctor.cpp index 85e28cf87f..5d7657d047 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.cpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.cpp @@ -51,7 +51,7 @@ static eye_fn_ptr_t eye_dispatch_vector[td_ns::num_types]; std::pair usm_ndarray_eye(py::ssize_t k, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { // dst must be 2D diff --git a/dpctl/tensor/libtensor/source/eye_ctor.hpp b/dpctl/tensor/libtensor/source/eye_ctor.hpp index bb02ea1075..4307e0f3b2 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.hpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.hpp @@ -40,7 +40,7 @@ namespace py_internal extern std::pair usm_ndarray_eye(py::ssize_t k, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_eye_ctor_dispatch_vectors(void); diff --git a/dpctl/tensor/libtensor/source/full_ctor.cpp b/dpctl/tensor/libtensor/source/full_ctor.cpp index cbf1736701..085bdcaf2a 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl/tensor/libtensor/source/full_ctor.cpp @@ -55,7 +55,7 @@ static full_contig_fn_ptr_t full_contig_dispatch_vector[td_ns::num_types]; std::pair usm_ndarray_full(const py::object &py_value, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { // start, end should be coercible into data type of dst diff --git a/dpctl/tensor/libtensor/source/full_ctor.hpp b/dpctl/tensor/libtensor/source/full_ctor.hpp index dd45ee9b60..3894babf1f 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.hpp +++ b/dpctl/tensor/libtensor/source/full_ctor.hpp @@ -40,7 +40,7 @@ namespace py_internal extern std::pair usm_ndarray_full(const py::object &py_value, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_full_ctor_dispatch_vectors(void); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index a817ac14d7..2cb86bbee0 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -67,7 +67,7 @@ namespace py = pybind11; using dpctl::utils::keep_args_alive; std::vector -_populate_kernel_params(sycl::queue exec_q, +_populate_kernel_params(sycl::queue &exec_q, std::vector &host_task_events, char **device_ind_ptrs, py::ssize_t *device_ind_sh_st, @@ -238,7 +238,7 @@ usm_ndarray_take(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, int axis_start, uint8_t mode, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { std::vector ind = parse_py_ind(exec_q, py_ind); @@ -549,7 +549,7 @@ usm_ndarray_put(const dpctl::tensor::usm_ndarray &dst, const dpctl::tensor::usm_ndarray &val, int axis_start, uint8_t mode, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { std::vector ind = parse_py_ind(exec_q, py_ind); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp index f2ce1b1d14..f845f7d23b 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp @@ -44,7 +44,7 @@ usm_ndarray_take(const dpctl::tensor::usm_ndarray &, const dpctl::tensor::usm_ndarray &, int, uint8_t, - sycl::queue, + sycl::queue &, const std::vector & = {}); extern std::pair @@ -53,7 +53,7 @@ usm_ndarray_put(const dpctl::tensor::usm_ndarray &, const dpctl::tensor::usm_ndarray &, int, uint8_t, - sycl::queue, + sycl::queue &, const std::vector & = {}); extern void init_advanced_indexing_dispatch_tables(void); diff --git a/dpctl/tensor/libtensor/source/linear_sequences.cpp b/dpctl/tensor/libtensor/source/linear_sequences.cpp index f933161900..34db93de12 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.cpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.cpp @@ -61,7 +61,7 @@ std::pair usm_ndarray_linear_sequence_step(const py::object &start, const py::object &dt, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { // dst must be 1D and C-contiguous @@ -109,7 +109,7 @@ usm_ndarray_linear_sequence_affine(const py::object &start, const py::object &end, const dpctl::tensor::usm_ndarray &dst, bool include_endpoint, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { // dst must be 1D and C-contiguous diff --git a/dpctl/tensor/libtensor/source/linear_sequences.hpp b/dpctl/tensor/libtensor/source/linear_sequences.hpp index f51fe266b1..61e613b45f 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.hpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.hpp @@ -41,7 +41,7 @@ extern std::pair usm_ndarray_linear_sequence_step(const py::object &start, const py::object &dt, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern std::pair usm_ndarray_linear_sequence_affine( @@ -49,7 +49,7 @@ extern std::pair usm_ndarray_linear_sequence_affine( const py::object &end, const dpctl::tensor::usm_ndarray &dst, bool include_endpoint, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends = {}); extern void init_linear_sequences_dispatch_vectors(void); diff --git a/dpctl/tensor/libtensor/source/repeat.cpp b/dpctl/tensor/libtensor/source/repeat.cpp index f9f7c599e1..6402347235 100644 --- a/dpctl/tensor/libtensor/source/repeat.cpp +++ b/dpctl/tensor/libtensor/source/repeat.cpp @@ -95,7 +95,7 @@ py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &reps, const dpctl::tensor::usm_ndarray &cumsum, int axis, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); @@ -348,7 +348,7 @@ py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, const py::ssize_t reps, int axis, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); diff --git a/dpctl/tensor/libtensor/source/repeat.hpp b/dpctl/tensor/libtensor/source/repeat.hpp index 7d3e8da2d9..87fb0a0847 100644 --- a/dpctl/tensor/libtensor/source/repeat.hpp +++ b/dpctl/tensor/libtensor/source/repeat.hpp @@ -45,7 +45,7 @@ py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &reps, const dpctl::tensor::usm_ndarray &cumsum, int axis, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends); extern std::pair @@ -53,7 +53,7 @@ py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, const py::ssize_t reps, int axis, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends); } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/sum_reductions.cpp b/dpctl/tensor/libtensor/source/sum_reductions.cpp index d82b6f256a..9a1865d1bb 100644 --- a/dpctl/tensor/libtensor/source/sum_reductions.cpp +++ b/dpctl/tensor/libtensor/source/sum_reductions.cpp @@ -98,7 +98,7 @@ std::pair py_sum_over_axis( const dpctl::tensor::usm_ndarray &src, int trailing_dims_to_reduce, // sum over this many trailing indexes const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { int src_nd = src.get_ndim(); @@ -422,7 +422,7 @@ std::pair py_sum_over_axis( bool py_sum_over_axis_dtype_supported(const py::dtype &input_dtype, const py::dtype &output_dtype, const std::string &dst_usm_type, - sycl::queue q) + sycl::queue &q) { int arg_tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 74c226823c..a245341c38 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -52,7 +52,7 @@ static tri_fn_ptr_t tril_generic_dispatch_vector[td_ns::num_types]; static tri_fn_ptr_t triu_generic_dispatch_vector[td_ns::num_types]; std::pair -usm_ndarray_triul(sycl::queue exec_q, +usm_ndarray_triul(sycl::queue &exec_q, const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, char part, diff --git a/dpctl/tensor/libtensor/source/triul_ctor.hpp b/dpctl/tensor/libtensor/source/triul_ctor.hpp index a2e6434dc8..9e7053c638 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.hpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.hpp @@ -38,7 +38,7 @@ namespace py_internal { extern std::pair -usm_ndarray_triul(sycl::queue exec_q, +usm_ndarray_triul(sycl::queue &exec_q, const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, char part, diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index 385f9b9428..9fe2be12ed 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -63,7 +63,7 @@ py_where(const dpctl::tensor::usm_ndarray &condition, const dpctl::tensor::usm_ndarray &x1, const dpctl::tensor::usm_ndarray &x2, const dpctl::tensor::usm_ndarray &dst, - sycl::queue exec_q, + sycl::queue &exec_q, const std::vector &depends) { diff --git a/dpctl/tensor/libtensor/source/where.hpp b/dpctl/tensor/libtensor/source/where.hpp index 4a0fe7fa6d..6fe6527080 100644 --- a/dpctl/tensor/libtensor/source/where.hpp +++ b/dpctl/tensor/libtensor/source/where.hpp @@ -42,7 +42,7 @@ py_where(const dpctl::tensor::usm_ndarray &, const dpctl::tensor::usm_ndarray &, const dpctl::tensor::usm_ndarray &, const dpctl::tensor::usm_ndarray &, - sycl::queue, + sycl::queue &, const std::vector &); extern void init_where_dispatch_tables(void); From 17f62591d1d6f42a7bee487eccfc8745aa18af86 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 05:32:09 -0500 Subject: [PATCH 4/9] Fixed coverity issues COPY_INSTEAD_OF_MOVE in libsyclinterface --- .../source/dpctl_sycl_context_interface.cpp | 6 +++-- .../source/dpctl_sycl_device_interface.cpp | 3 ++- .../source/dpctl_sycl_device_manager.cpp | 3 ++- .../dpctl_sycl_kernel_bundle_interface.cpp | 12 ++++++---- .../source/dpctl_sycl_platform_interface.cpp | 3 ++- .../source/dpctl_sycl_queue_interface.cpp | 23 ++++++++++--------- .../source/dpctl_sycl_usm_interface.cpp | 3 ++- 7 files changed, 31 insertions(+), 22 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_context_interface.cpp b/libsyclinterface/source/dpctl_sycl_context_interface.cpp index d93ca4e992..a19286a779 100644 --- a/libsyclinterface/source/dpctl_sycl_context_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_context_interface.cpp @@ -29,6 +29,7 @@ #include "dpctl_error_handlers.h" #include "dpctl_sycl_type_casters.hpp" #include +#include #include using namespace sycl; @@ -86,7 +87,7 @@ DPCTLContext_CreateFromDevices(__dpctl_keep const DPCTLDeviceVectorRef DVRef, try { CRef = wrap( - new context(Devices, DPCTL_AsyncErrorHandler(handler))); + new context(std::move(Devices), DPCTL_AsyncErrorHandler(handler))); } catch (std::exception const &e) { error_handler(e, __FILE__, __func__, __LINE__); } @@ -146,7 +147,8 @@ DPCTLContext_GetDevices(__dpctl_keep const DPCTLSyclContextRef CRef) auto Devices = Context->get_devices(); DevicesVectorPtr->reserve(Devices.size()); for (const auto &Dev : Devices) { - DevicesVectorPtr->emplace_back(wrap(new device(Dev))); + DevicesVectorPtr->emplace_back( + wrap(new device(std::move(Dev)))); } return wrap(DevicesVectorPtr); } catch (std::exception const &e) { diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 73dfcc7e11..b5a97013c2 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -35,6 +35,7 @@ #include /* SYCL headers */ #include #include +#include #include using namespace sycl; @@ -543,7 +544,7 @@ DPCTLDevice_GetParentDevice(__dpctl_keep const DPCTLSyclDeviceRef DRef) auto D = unwrap(DRef); if (D) { try { - auto parent_D = D->get_info(); + const auto &parent_D = D->get_info(); return wrap(new device(parent_D)); } catch (std::exception const &e) { error_handler(e, __FILE__, __func__, __LINE__); diff --git a/libsyclinterface/source/dpctl_sycl_device_manager.cpp b/libsyclinterface/source/dpctl_sycl_device_manager.cpp index 014299721b..0eb71df412 100644 --- a/libsyclinterface/source/dpctl_sycl_device_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_manager.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include using namespace sycl; @@ -188,7 +189,7 @@ DPCTLDeviceMgr_GetCachedContext(__dpctl_keep const DPCTLSyclDeviceRef DRef) if (entry != cache.end()) { context *ContextPtr = nullptr; try { - ContextPtr = new context(entry->second); + ContextPtr = new context(std::move(entry->second)); CRef = wrap(ContextPtr); } catch (std::exception const &e) { error_handler(e, __FILE__, __func__, __LINE__); diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index f7e059f8dc..201c8172e3 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -38,6 +38,7 @@ #include #endif #include +#include #ifdef DPCTL_ENABLE_L0_PROGRAM_CREATION // Note: include ze_api.h before level_zero.hpp. Make sure clang-format does @@ -202,7 +203,7 @@ _CreateKernelBundle_common_ocl_impl(cl_program clProgram, } using ekbTy = kernel_bundle; - ekbTy kb = + const ekbTy &kb = make_kernel_bundle(clProgram, ctx); return wrap(new ekbTy(kb)); } @@ -317,7 +318,8 @@ _GetKernel_ocl_impl(const kernel_bundle &kb, try { context ctx = kb.get_context(); - kernel interop_kernel = make_kernel(ocl_kernel_from_kb, ctx); + const kernel &interop_kernel = + make_kernel(ocl_kernel_from_kb, ctx); return wrap(new kernel(interop_kernel)); } catch (std::exception const &e) { @@ -473,7 +475,7 @@ _CreateKernelBundleWithIL_ze_impl(const context &SyclCtx, } try { - auto kb = make_kernel_bundle( + const auto &kb = make_kernel_bundle( {ZeModule, ext::oneapi::level_zero::ownership::keep}, SyclCtx); return wrap>( @@ -514,8 +516,8 @@ _GetKernel_ze_impl(const kernel_bundle &kb, if (ze_status == ZE_RESULT_SUCCESS) { found = true; - auto ctx = kb.get_context(); - auto k = make_kernel( + const auto &ctx = kb.get_context(); + const auto &k = make_kernel( {kb, ZeKern, ext::oneapi::level_zero::ownership::keep}, ctx); syclInteropKern_ptr = std::unique_ptr(new kernel(k)); break; diff --git a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp index 5be98b7b61..fb0fbd6bd2 100644 --- a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include using namespace sycl; @@ -224,7 +225,7 @@ DPCTLPlatform_GetDefaultContext(__dpctl_keep const DPCTLSyclPlatformRef PRef) { auto P = unwrap(PRef); if (P) { - auto default_ctx = P->ext_oneapi_get_default_context(); + const auto &default_ctx = P->ext_oneapi_get_default_context(); return wrap(new context(default_ctx)); } else { diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index ce318ce37e..e94cfeda25 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -34,6 +34,7 @@ #include /* SYCL headers */ #include #include +#include using namespace sycl; @@ -387,7 +388,7 @@ DPCTLQueue_SubmitRange(__dpctl_keep const DPCTLSyclKernelRef KRef, return nullptr; } - return wrap(new event(e)); + return wrap(new event(std::move(e))); } __dpctl_give DPCTLSyclEventRef @@ -443,7 +444,7 @@ DPCTLQueue_SubmitNDRange(__dpctl_keep const DPCTLSyclKernelRef KRef, return nullptr; } - return wrap(new event(e)); + return wrap(new event(std::move(e))); } void DPCTLQueue_Wait(__dpctl_keep DPCTLSyclQueueRef QRef) @@ -475,7 +476,7 @@ DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef passed to memcpy was NULL.", __FILE__, __func__, @@ -529,7 +530,7 @@ DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef passed to prefetch was NULL.", __FILE__, __func__, @@ -593,7 +594,7 @@ __dpctl_give DPCTLSyclEventRef DPCTLQueue_SubmitBarrierForEvents( return nullptr; } - return wrap(new event(e)); + return wrap(new event(std::move(e))); } else { error_handler("Argument QRef is NULL", __FILE__, __func__, __LINE__); @@ -622,7 +623,7 @@ DPCTLQueue_Memset(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill8 were NULL.", __FILE__, @@ -646,7 +647,7 @@ DPCTLQueue_Fill8(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill8 were NULL.", __FILE__, @@ -670,7 +671,7 @@ DPCTLQueue_Fill16(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill16 were NULL.", __FILE__, @@ -694,7 +695,7 @@ DPCTLQueue_Fill32(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill32 were NULL.", __FILE__, @@ -718,7 +719,7 @@ DPCTLQueue_Fill64(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill64 were NULL.", __FILE__, @@ -745,7 +746,7 @@ DPCTLQueue_Fill128(__dpctl_keep const DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("QRef or USMRef passed to fill128 were NULL.", __FILE__, diff --git a/libsyclinterface/source/dpctl_sycl_usm_interface.cpp b/libsyclinterface/source/dpctl_sycl_usm_interface.cpp index 1b7a0c85ea..2ebae9801e 100644 --- a/libsyclinterface/source/dpctl_sycl_usm_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_usm_interface.cpp @@ -30,6 +30,7 @@ #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_type_casters.hpp" #include /* SYCL headers */ +#include using namespace sycl; @@ -221,7 +222,7 @@ DPCTLUSM_GetPointerDevice(__dpctl_keep const DPCTLSyclUSMRef MRef, auto Ptr = unwrap(MRef); auto C = unwrap(CRef); - auto Dev = get_pointer_device(Ptr, *C); + const auto &Dev = get_pointer_device(Ptr, *C); return wrap(new device(Dev)); } From 4766173f95a71a9cdd1c9f6586b2089af8a764a0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 05:34:17 -0500 Subject: [PATCH 5/9] Resolve COPY_INSTEAD_OF_MOVE issues in libtensor --- dpctl/apis/include/dpctl4pybind11.hpp | 13 ++++---- .../include/kernels/accumulators.hpp | 25 ++++++++------- .../include/kernels/copy_and_cast.hpp | 4 +-- .../kernels/elementwise_functions/common.hpp | 2 +- .../libtensor/include/kernels/reductions.hpp | 8 +++-- .../libtensor/include/utils/offset_utils.hpp | 2 +- .../libtensor/include/utils/strided_iters.hpp | 20 ++++++------ .../source/boolean_advanced_indexing.cpp | 8 ++--- .../libtensor/source/boolean_reductions.hpp | 2 +- .../source/copy_and_cast_usm_to_usm.cpp | 8 ++--- .../libtensor/source/copy_for_reshape.cpp | 2 +- .../tensor/libtensor/source/copy_for_roll.cpp | 4 +-- .../copy_numpy_ndarray_into_usm_ndarray.cpp | 2 +- .../source/elementwise_functions.hpp | 6 ++-- .../source/integer_advanced_indexing.cpp | 32 ++++++++++--------- dpctl/tensor/libtensor/source/repeat.cpp | 4 +-- .../libtensor/source/sum_reductions.cpp | 2 +- dpctl/tensor/libtensor/source/tensor_py.cpp | 21 ++++++------ dpctl/tensor/libtensor/source/triul_ctor.cpp | 6 ++-- dpctl/tensor/libtensor/source/where.cpp | 2 +- 20 files changed, 91 insertions(+), 82 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index b529c41599..32fd85c82b 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace py = pybind11; @@ -369,19 +370,19 @@ class dpctl_capi sycl::queue q_{}; PySyclQueueObject *py_q_tmp = SyclQueue_Make(reinterpret_cast(&q_)); - py::object py_sycl_queue = py::reinterpret_steal( + const py::object &py_sycl_queue = py::reinterpret_steal( reinterpret_cast(py_q_tmp)); default_sycl_queue_ = std::shared_ptr( new py::object(py_sycl_queue), Deleter{}); py::module_ mod_memory = py::module_::import("dpctl.memory"); - py::object py_as_usm_memory = mod_memory.attr("as_usm_memory"); + const py::object &py_as_usm_memory = mod_memory.attr("as_usm_memory"); as_usm_memory_ = std::shared_ptr( new py::object{py_as_usm_memory}, Deleter{}); auto mem_kl = mod_memory.attr("MemoryUSMHost"); - py::object py_default_usm_memory = + const py::object &py_default_usm_memory = mem_kl(1, py::arg("queue") = py_sycl_queue); default_usm_memory_ = std::shared_ptr( new py::object{py_default_usm_memory}, Deleter{}); @@ -390,7 +391,7 @@ class dpctl_capi py::module_::import("dpctl.tensor._usmarray"); auto tensor_kl = mod_usmarray.attr("usm_ndarray"); - py::object py_default_usm_ndarray = + const py::object &py_default_usm_ndarray = tensor_kl(py::tuple(), py::arg("dtype") = py::str("u1"), py::arg("buffer") = py_default_usm_memory); @@ -1032,7 +1033,7 @@ namespace utils { template -sycl::event keep_args_alive(sycl::queue q, +sycl::event keep_args_alive(sycl::queue &q, const py::object (&py_objs)[num], const std::vector &depends = {}) { @@ -1043,7 +1044,7 @@ sycl::event keep_args_alive(sycl::queue q, shp_arr[i] = std::make_shared(py_objs[i]); shp_arr[i]->inc_ref(); } - cgh.host_task([=]() { + cgh.host_task([shp_arr = std::move(shp_arr)]() { py::gil_scoped_acquire acquire; for (std::size_t i = 0; i < num; ++i) { diff --git a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp index d153a3b332..110010706c 100644 --- a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp +++ b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp @@ -116,19 +116,20 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q, { size_t n_groups = ceiling_quotient(n_elems, n_wi * wg_size); - sycl::event inc_scan_phase1_ev = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); + const sycl::event &inc_scan_phase1_ev = + exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); - using slmT = sycl::local_accessor; + using slmT = sycl::local_accessor; - auto lws = sycl::range<1>(wg_size); - auto gws = sycl::range<1>(n_groups * wg_size); + auto lws = sycl::range<1>(wg_size); + auto gws = sycl::range<1>(n_groups * wg_size); - slmT slm_iscan_tmp(lws, cgh); + slmT slm_iscan_tmp(lws, cgh); cgh.parallel_for>( - sycl::nd_range<1>(gws, lws), [=](sycl::nd_item<1> it) + sycl::nd_range<1>(gws, lws), [=, slm_iscan_tmp = std::move(slm_iscan_tmp)](sycl::nd_item<1> it) { auto chunk_gid = it.get_global_id(0); auto lid = it.get_local_id(0); @@ -172,7 +173,7 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q, output[i + m_wi] = local_isum[m_wi]; } }); - }); + }); sycl::event out_event = inc_scan_phase1_ev; if (n_groups > 1) { @@ -203,11 +204,11 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q, sycl::event e4 = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(e3); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, temp]() { sycl::free(temp, ctx); }); }); - out_event = e4; + out_event = std::move(e4); } return out_event; @@ -235,7 +236,7 @@ size_t accumulate_contig_impl(sycl::queue &q, NoOpIndexer flat_indexer{}; transformerT non_zero_indicator{}; - sycl::event comp_ev = + const sycl::event &comp_ev = inclusive_scan_rec( q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0, 1, @@ -321,7 +322,7 @@ size_t accumulate_strided_impl(sycl::queue &q, StridedIndexer strided_indexer{nd, 0, shape_strides}; transformerT non_zero_indicator{}; - sycl::event comp_ev = + const sycl::event &comp_ev = inclusive_scan_rec( q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0, 1, diff --git a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp index 0c8f4a64f7..0db1f071a1 100644 --- a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp +++ b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp @@ -483,12 +483,12 @@ template #include #include +#include #include #include "pybind11/pybind11.h" @@ -760,7 +761,8 @@ sycl::event sum_reduction_over_group_temps_strided_impl( partially_reduced_tmp + reduction_groups * iter_nelems; } - sycl::event first_reduction_ev = exec_q.submit([&](sycl::handler &cgh) { + const sycl::event &first_reduction_ev = exec_q.submit([&](sycl::handler + &cgh) { cgh.depends_on(depends); using InputIndexerT = dpctl::tensor::offset_utils::StridedIndexer; @@ -858,7 +860,7 @@ sycl::event sum_reduction_over_group_temps_strided_impl( remaining_reduction_nelems = reduction_groups_; std::swap(temp_arg, temp2_arg); - dependent_ev = partial_reduction_ev; + dependent_ev = std::move(partial_reduction_ev); } // final reduction to res @@ -915,7 +917,7 @@ sycl::event sum_reduction_over_group_temps_strided_impl( sycl::event cleanup_host_task_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(final_reduction_ev); - sycl::context ctx = exec_q.get_context(); + const sycl::context &ctx = exec_q.get_context(); cgh.host_task([ctx, partially_reduced_tmp] { sycl::free(partially_reduced_tmp, ctx); diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp index 19bcf9d0a8..29517ce2c5 100644 --- a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp @@ -98,7 +98,7 @@ device_allocate_and_pack(sycl::queue q, usm_host_allocatorT usm_host_allocator(q); shT empty{0, usm_host_allocator}; - shT packed_shape_strides = detail::concat(empty, vs...); + shT packed_shape_strides = detail::concat(std::move(empty), vs...); auto packed_shape_strides_owner = std::make_shared(std::move(packed_shape_strides)); diff --git a/dpctl/tensor/libtensor/include/utils/strided_iters.hpp b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp index 7cca7c7b5d..48bcab4d77 100644 --- a/dpctl/tensor/libtensor/include/utils/strided_iters.hpp +++ b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp @@ -541,7 +541,7 @@ int simplify_iteration_two_strides(const int nd, } template > -std::tuple contract_iter(vecT shape, vecT strides) +std::tuple contract_iter(const vecT &shape, const vecT &strides) { const size_t dim = shape.size(); if (dim != strides.size()) { @@ -560,7 +560,7 @@ std::tuple contract_iter(vecT shape, vecT strides) template > std::tuple -contract_iter2(vecT shape, vecT strides1, vecT strides2) +contract_iter2(const vecT &shape, const vecT &strides1, const vecT &strides2) { const size_t dim = shape.size(); if (dim != strides1.size() || dim != strides2.size()) { @@ -714,8 +714,10 @@ int simplify_iteration_three_strides(const int nd, } template > -std::tuple -contract_iter3(vecT shape, vecT strides1, vecT strides2, vecT strides3) +std::tuple contract_iter3(const vecT &shape, + const vecT &strides1, + const vecT &strides2, + const vecT &strides3) { const size_t dim = shape.size(); if (dim != strides1.size() || dim != strides2.size() || @@ -899,11 +901,11 @@ int simplify_iteration_four_strides(const int nd, template > std::tuple -contract_iter4(vecT shape, - vecT strides1, - vecT strides2, - vecT strides3, - vecT strides4) +contract_iter4(const vecT &shape, + const vecT &strides1, + const vecT &strides2, + const vecT &strides3, + const vecT &strides4) { const size_t dim = shape.size(); if (dim != strides1.size() || dim != strides2.size() || diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index 226272536a..4bbf964511 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -264,7 +264,7 @@ py_extract(const dpctl::tensor::usm_ndarray &src, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(extract_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_src_shape_strides] { sycl::free(packed_src_shape_strides, ctx); }); @@ -366,7 +366,7 @@ py_extract(const dpctl::tensor::usm_ndarray &src, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(extract_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_shapes_strides] { sycl::free(packed_shapes_strides, ctx); }); @@ -693,7 +693,7 @@ py_place(const dpctl::tensor::usm_ndarray &dst, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(place_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_shapes_strides] { sycl::free(packed_shapes_strides, ctx); }); @@ -838,7 +838,7 @@ py_nonzero(const dpctl::tensor::usm_ndarray sycl::event temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(non_zero_indexes_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, src_shape_device_ptr] { sycl::free(src_shape_device_ptr, ctx); }); diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.hpp b/dpctl/tensor/libtensor/source/boolean_reductions.hpp index 09c95bfeae..5a0d5d381a 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.hpp @@ -292,7 +292,7 @@ py_boolean_reduction(const dpctl::tensor::usm_ndarray &src, sycl::event temp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(red_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_shapes_and_strides] { sycl::free(packed_shapes_and_strides, ctx); }); diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index baaf3733a0..290ab88fe8 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -250,15 +250,15 @@ copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src, if (shape_strides == nullptr) { throw std::runtime_error("Unable to allocate device memory"); } - sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple); + const sycl::event ©_shape_ev = std::get<2>(ptr_size_event_tuple); - sycl::event copy_and_cast_generic_ev = copy_and_cast_fn( + const sycl::event ©_and_cast_generic_ev = copy_and_cast_fn( exec_q, src_nelems, nd, shape_strides, src_data, src_offset, dst_data, dst_offset, depends, {copy_shape_ev}); // async free of shape_strides temporary - auto ctx = exec_q.get_context(); - auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { + const auto &ctx = exec_q.get_context(); + const auto &temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_and_cast_generic_ev); cgh.host_task( [ctx, shape_strides]() { sycl::free(shape_strides, ctx); }); diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index eb2c45a0cc..c9ab58528a 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -158,7 +158,7 @@ copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src, auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_for_reshape_event); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task( [shape_strides, ctx]() { sycl::free(shape_strides, ctx); }); }); diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.cpp b/dpctl/tensor/libtensor/source/copy_for_roll.cpp index cabe904e64..cc319e6e08 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.cpp @@ -239,7 +239,7 @@ copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src, auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_for_roll_event); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task( [shape_strides, ctx]() { sycl::free(shape_strides, ctx); }); }); @@ -379,7 +379,7 @@ copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src, auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_for_roll_event); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([shape_strides_shifts, ctx]() { sycl::free(shape_strides_shifts, ctx); }); diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp index 5616cfa8db..bb367a42b9 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp @@ -236,7 +236,7 @@ void copy_numpy_ndarray_into_usm_ndarray( if (shape_strides == nullptr) { throw std::runtime_error("Unable to allocate device memory"); } - sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple); + const sycl::event ©_shape_ev = std::get<2>(ptr_size_event_tuple); // Get implementation function pointer auto copy_and_cast_from_host_blocking_fn = diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 58704d83de..523e4259c3 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -227,7 +227,7 @@ py_unary_ufunc(const dpctl::tensor::usm_ndarray &src, q, host_tasks, simplified_shape, simplified_src_strides, simplified_dst_strides); py::ssize_t *shape_strides = std::get<0>(ptr_size_event_triple_); - sycl::event copy_shape_ev = std::get<2>(ptr_size_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_size_event_triple_); if (shape_strides == nullptr) { throw std::runtime_error("Device memory allocation failed"); @@ -533,7 +533,7 @@ std::pair py_binary_ufunc( simplified_src2_strides, simplified_dst_strides); py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_); - sycl::event copy_shape_ev = std::get<2>(ptr_sz_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_sz_event_triple_); if (shape_strides == nullptr) { throw std::runtime_error("Unabled to allocate device memory"); @@ -799,7 +799,7 @@ py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs, simplified_lhs_strides); py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_); - sycl::event copy_shape_ev = std::get<2>(ptr_sz_event_triple_); + const sycl::event ©_shape_ev = std::get<2>(ptr_sz_event_triple_); if (shape_strides == nullptr) { throw std::runtime_error("Unabled to allocate device memory"); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index 2cb86bbee0..a17a229fc1 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -120,14 +120,14 @@ _populate_kernel_params(sycl::queue &exec_q, std::copy(ind_offsets.begin(), ind_offsets.end(), host_ind_offsets_shp->begin()); - sycl::event device_ind_ptrs_copy_ev = exec_q.copy( + const sycl::event &device_ind_ptrs_copy_ev = exec_q.copy( host_ind_ptrs_shp->data(), device_ind_ptrs, host_ind_ptrs_shp->size()); - sycl::event device_ind_sh_st_copy_ev = + const sycl::event &device_ind_sh_st_copy_ev = exec_q.copy(host_ind_sh_st_shp->data(), device_ind_sh_st, host_ind_sh_st_shp->size()); - sycl::event device_ind_offsets_copy_ev = exec_q.copy( + const sycl::event &device_ind_offsets_copy_ev = exec_q.copy( host_ind_offsets_shp->data(), device_ind_offsets, host_ind_offsets_shp->size()); @@ -173,22 +173,24 @@ _populate_kernel_params(sycl::queue &exec_q, host_along_sh_st_shp->begin() + 2 * k + ind_nd); } - sycl::event device_orthog_sh_st_copy_ev = exec_q.copy( + const sycl::event &device_orthog_sh_st_copy_ev = exec_q.copy( host_orthog_sh_st_shp->data(), device_orthog_sh_st, host_orthog_sh_st_shp->size()); - sycl::event device_along_sh_st_copy_ev = exec_q.copy( + const sycl::event &device_along_sh_st_copy_ev = exec_q.copy( host_along_sh_st_shp->data(), device_along_sh_st, host_along_sh_st_shp->size()); - sycl::event shared_ptr_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on({device_along_sh_st_copy_ev, device_orthog_sh_st_copy_ev, - device_ind_offsets_copy_ev, device_ind_sh_st_copy_ev, - device_ind_ptrs_copy_ev}); - cgh.host_task([host_ind_offsets_shp, host_ind_sh_st_shp, - host_ind_ptrs_shp, host_orthog_sh_st_shp, - host_along_sh_st_shp]() {}); - }); + const sycl::event &shared_ptr_cleanup_ev = + exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on({device_along_sh_st_copy_ev, + device_orthog_sh_st_copy_ev, + device_ind_offsets_copy_ev, + device_ind_sh_st_copy_ev, device_ind_ptrs_copy_ev}); + cgh.host_task([host_ind_offsets_shp, host_ind_sh_st_shp, + host_ind_ptrs_shp, host_orthog_sh_st_shp, + host_along_sh_st_shp]() {}); + }); host_task_events.push_back(shared_ptr_cleanup_ev); std::vector sh_st_pack_deps{ @@ -523,7 +525,7 @@ usm_ndarray_take(const dpctl::tensor::usm_ndarray &src, // free packed temporaries sycl::event temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(take_generic_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([packed_shapes_strides, packed_axes_shapes_strides, packed_ind_shapes_strides, packed_ind_ptrs, packed_ind_offsets, ctx]() { @@ -837,7 +839,7 @@ usm_ndarray_put(const dpctl::tensor::usm_ndarray &dst, // free packed temporaries sycl::event temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(put_generic_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([packed_shapes_strides, packed_axes_shapes_strides, packed_ind_shapes_strides, packed_ind_ptrs, packed_ind_offsets, ctx]() { diff --git a/dpctl/tensor/libtensor/source/repeat.cpp b/dpctl/tensor/libtensor/source/repeat.cpp index 6402347235..0dbfb17a5d 100644 --- a/dpctl/tensor/libtensor/source/repeat.cpp +++ b/dpctl/tensor/libtensor/source/repeat.cpp @@ -327,7 +327,7 @@ py_repeat_by_sequence(const dpctl::tensor::usm_ndarray &src, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(repeat_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_shapes_strides] { sycl::free(packed_shapes_strides, ctx); }); @@ -538,7 +538,7 @@ py_repeat_by_scalar(const dpctl::tensor::usm_ndarray &src, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(repeat_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_shapes_strides] { sycl::free(packed_shapes_strides, ctx); }); diff --git a/dpctl/tensor/libtensor/source/sum_reductions.cpp b/dpctl/tensor/libtensor/source/sum_reductions.cpp index 9a1865d1bb..529096f5b6 100644 --- a/dpctl/tensor/libtensor/source/sum_reductions.cpp +++ b/dpctl/tensor/libtensor/source/sum_reductions.cpp @@ -406,7 +406,7 @@ std::pair py_sum_over_axis( sycl::event temp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(comp_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, temp_allocation_ptr] { sycl::free(temp_allocation_ptr, ctx); }); diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index fec246325c..2ce7c72add 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -338,9 +338,9 @@ PYBIND11_MODULE(_tensor_impl, m) dpctl::tensor::py_internal::default_device_index_type, "Gives default index type supported by device.", py::arg("dev")); - auto tril_fn = [](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, py::ssize_t k, - sycl::queue exec_q, + auto tril_fn = [](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, py::ssize_t k, + sycl::queue &exec_q, const std::vector depends) -> std::pair { return usm_ndarray_triul(exec_q, src, dst, 'l', k, depends); @@ -349,9 +349,9 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("dst"), py::arg("k") = 0, py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto triu_fn = [](dpctl::tensor::usm_ndarray src, - dpctl::tensor::usm_ndarray dst, py::ssize_t k, - sycl::queue exec_q, + auto triu_fn = [](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, py::ssize_t k, + sycl::queue &exec_q, const std::vector depends) -> std::pair { return usm_ndarray_triul(exec_q, src, dst, 'u', k, depends); @@ -371,8 +371,8 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("axis_start"), py::arg("axis_end"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto overlap = [](dpctl::tensor::usm_ndarray x1, - dpctl::tensor::usm_ndarray x2) -> bool { + auto overlap = [](const dpctl::tensor::usm_ndarray &x1, + const dpctl::tensor::usm_ndarray &x2) -> bool { auto const &overlap = MemoryOverlap(); return overlap(x1, x2); }; @@ -380,8 +380,9 @@ PYBIND11_MODULE(_tensor_impl, m) "Determines if the memory regions indexed by each array overlap", py::arg("array1"), py::arg("array2")); - auto same_logical_tensors = [](dpctl::tensor::usm_ndarray x1, - dpctl::tensor::usm_ndarray x2) -> bool { + auto same_logical_tensors = + [](const dpctl::tensor::usm_ndarray &x1, + const dpctl::tensor::usm_ndarray &x2) -> bool { auto const &same_logical_tensors = SameLogicalTensors(); return same_logical_tensors(x1, x2); }; diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index a245341c38..40dd5cf48a 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -174,7 +174,7 @@ usm_ndarray_triul(sycl::queue &exec_q, if (dev_shape_and_strides == nullptr) { throw std::runtime_error("Unabled to allocate device memory"); } - sycl::event copy_shape_and_strides = exec_q.copy( + const sycl::event ©_shape_and_strides = exec_q.copy( shp_host_shape_and_strides->data(), dev_shape_and_strides, 3 * nd); py::ssize_t inner_range = src_shape[src_nd - 1] * src_shape[src_nd - 2]; @@ -194,9 +194,9 @@ usm_ndarray_triul(sycl::queue &exec_q, dev_shape_and_strides, k, depends, {copy_shape_and_strides}); } - auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { + const auto &temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(tri_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task( [shp_host_shape_and_strides, dev_shape_and_strides, ctx]() { // capture of shp_host_shape_and_strides ensure the underlying diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index 9fe2be12ed..ed782bda34 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -228,7 +228,7 @@ py_where(const dpctl::tensor::usm_ndarray &condition, // free packed temporaries sycl::event temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(where_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([packed_shape_strides, ctx]() { sycl::free(packed_shape_strides, ctx); }); From c41d09bfbdbe5a770dae0be2ef36ff397b850a88 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 07:54:18 -0500 Subject: [PATCH 6/9] Utilities take queue as const reference --- dpctl/apis/include/dpctl4pybind11.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index 32fd85c82b..f68826af48 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -1059,7 +1059,7 @@ sycl::event keep_args_alive(sycl::queue &q, /*! @brief Check if all allocation queues are the same as the execution queue */ template -bool queues_are_compatible(sycl::queue exec_q, +bool queues_are_compatible(const sycl::queue &exec_q, const sycl::queue (&alloc_qs)[num]) { for (std::size_t i = 0; i < num; ++i) { @@ -1074,7 +1074,7 @@ bool queues_are_compatible(sycl::queue exec_q, /*! @brief Check if all allocation queues of usm_ndarays are the same as the execution queue */ template -bool queues_are_compatible(sycl::queue exec_q, +bool queues_are_compatible(const sycl::queue &exec_q, const ::dpctl::tensor::usm_ndarray (&arrs)[num]) { for (std::size_t i = 0; i < num; ++i) { From 230b844f05c0a000d7ea4e619b0fa583cd5e5468 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 07:56:44 -0500 Subject: [PATCH 7/9] Fixed coverity issue "unintended sign extension" --- .../kernels/elementwise_functions/common.hpp | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp index db5114e241..c0a94be341 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp @@ -82,9 +82,11 @@ struct UnaryContigFunctor sycl::vec res_vec(const_val); #pragma unroll for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); sg.store(out_multi_ptr, res_vec); } @@ -111,12 +113,14 @@ struct UnaryContigFunctor #pragma unroll for (std::uint16_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto in_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in[base + it * sgSize]); + sycl::access::decorated::yes>(&in[offset]); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); x = sg.load(in_multi_ptr); sycl::vec res_vec = op(x); @@ -149,12 +153,14 @@ struct UnaryContigFunctor #pragma unroll for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto in_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in[base + it * sgSize]); + sycl::access::decorated::yes>(&in[offset]); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); arg_vec = sg.load(in_multi_ptr); #pragma unroll @@ -188,12 +194,14 @@ struct UnaryContigFunctor #pragma unroll for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto in_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in[base + it * sgSize]); + sycl::access::decorated::yes>(&in[offset]); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); arg_vec = sg.load(in_multi_ptr); #pragma unroll @@ -375,15 +383,17 @@ struct BinaryContigFunctor #pragma unroll for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto in1_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in1[base + it * sgSize]); + sycl::access::decorated::yes>(&in1[offset]); auto in2_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in2[base + it * sgSize]); + sycl::access::decorated::yes>(&in2[offset]); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); arg1_vec = sg.load(in1_multi_ptr); arg2_vec = sg.load(in2_multi_ptr); @@ -415,15 +425,17 @@ struct BinaryContigFunctor #pragma unroll for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) { + size_t offset = base + static_cast(it) * + static_cast(sgSize); auto in1_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in1[base + it * sgSize]); + sycl::access::decorated::yes>(&in1[offset]); auto in2_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&in2[base + it * sgSize]); + sycl::access::decorated::yes>(&in2[offset]); auto out_multi_ptr = sycl::address_space_cast< sycl::access::address_space::global_space, - sycl::access::decorated::yes>(&out[base + it * sgSize]); + sycl::access::decorated::yes>(&out[offset]); arg1_vec = sg.load(in1_multi_ptr); arg2_vec = sg.load(in2_multi_ptr); From b9d987a258a9b6cf19e00b352b05cd3444ff3953 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 09:53:45 -0500 Subject: [PATCH 8/9] Fixed remaining copy-instead-of-move issue --- dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index 4bbf964511..ff7b32d0f7 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -594,7 +594,7 @@ py_place(const dpctl::tensor::usm_ndarray &dst, sycl::event cleanup_tmp_allocations_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(place_ev); - auto ctx = exec_q.get_context(); + const auto &ctx = exec_q.get_context(); cgh.host_task([ctx, packed_dst_shape_strides] { sycl::free(packed_dst_shape_strides, ctx); }); From 15f9320c03e89c9612a1282c6de776eb02015667 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Sep 2023 09:54:14 -0500 Subject: [PATCH 9/9] Fixed leftover copy-instead-of-move issue --- libsyclinterface/source/dpctl_sycl_queue_interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index e94cfeda25..4903b888ff 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -500,7 +500,7 @@ DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef, error_handler(e, __FILE__, __func__, __LINE__); return nullptr; } - return wrap(new event(ev)); + return wrap(new event(std::move(ev))); } else { error_handler("Attempt to prefetch USM-allocation at nullptr.",