diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 07bd44c8fd10..da29bf31dd09 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -29,11 +29,8 @@ jobs: strategy: matrix: - python: ['3.8', '3.9'] + python: ['3.8', '3.9', '3.10'] os: [ubuntu-20.04, windows-latest] - include: - - python: '3.10' - os: ubuntu-20.04 runs-on: ${{ matrix.os }} @@ -110,7 +107,7 @@ jobs: strategy: matrix: - python: ['3.8', '3.9'] + python: ['3.8', '3.9', '3.10'] os: [ubuntu-20.04, ubuntu-latest] experimental: [false] @@ -215,7 +212,7 @@ jobs: strategy: matrix: - python: ['3.8', '3.9'] + python: ['3.8', '3.9', '3.10'] experimental: [false] continue-on-error: ${{ matrix.experimental }} @@ -384,7 +381,7 @@ jobs: strategy: matrix: - python: ['3.8', '3.9'] + python: ['3.8', '3.9', '3.10'] os: [ubuntu-20.04, windows-latest] runs-on: ${{ matrix.os }} diff --git a/0.build.sh b/0.build.sh index 380214e6802c..b1a2a29ec0ae 100755 --- a/0.build.sh +++ b/0.build.sh @@ -4,6 +4,14 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) # . ${THEDIR}/0.env.sh cd ${THEDIR} +# Assign $TMP env variable to a directory where the script locates. +# The env variable is used by compiler as a path to temporary folder, +# where it can store a temporary files generated during compilation and linkage phases. +# By default the compiler uses /tmp folder, but it is limited by the size and +# there might be not enough space to temporary keep all generated data. +export TMP=${THEDIR} + + export DPNP_DEBUG=1 python setup.py clean @@ -17,7 +25,8 @@ CC=icpx python setup.py build_ext --inplace echo echo =========example3============== -icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3 +DPCTL_INCLUDES=$(python -m dpctl --includes) +icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3 # LD_DEBUG=libs,bindings,symbols ./example3 ./example3 @@ -39,7 +48,7 @@ icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/in # strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n -# echo +echo echo =========example1============== # LD_DEBUG=libs,bindings,symbols python examples/example1.py # LD_DEBUG=libs python examples/example1.py diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index d873320f80f8..164ad09d578f 100644 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -29,6 +29,11 @@ fi export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS" export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS" +# Intel LLVM must cooperate with compiler and sysroot from conda +echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg +export ICPXCFG="$(pwd)/icpx_for_conda.cfg" +export ICXCFG="$(pwd)/icpx_for_conda.cfg" + $PYTHON setup.py build_clib $PYTHON setup.py build_ext install diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index b384776d2607..0c6e38f667db 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -11,17 +11,17 @@ requirements: - numpy 1.19 - cython - cmake >=3.19 - - dpctl >=0.13 - - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2021.1.1') }} + - dpctl >=0.14 + - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }} - onedpl-devel - tbb-devel - wheel build: - {{ compiler('cxx') }} - - {{ compiler('dpcpp') }} >=2022.1 # [not osx] + - {{ compiler('dpcpp') }} >=2023.0 # [not osx] run: - python - - dpctl >=0.13 + - dpctl >=0.14 - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }} - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }} diff --git a/doc/conf.py b/doc/conf.py index 46505fa8f6db..999b2504bd64 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -33,7 +33,7 @@ # The short X.Y version version = '0.11' # The full version, including alpha/beta/rc tags -release = '0.11.0' +release = '0.11.1' # -- General configuration --------------------------------------------------- diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index 1714124cf85a..baee709b11ee 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -1,5 +1,5 @@ # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -# set(DPNP_VERSION 0.11.0) +# set(DPNP_VERSION 0.11.1) # set(DPNP_API_VERSION 0.11) # set directory where the custom finders live @@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS "-fsycl " "-fsycl-device-code-split=per_kernel " "-fno-approx-func " + "-fno-finite-math-only " ) string(CONCAT COMMON_LINK_FLAGS "-fsycl " @@ -111,7 +112,7 @@ elseif(WIN32) # set(CMAKE_RANLIB "llvm-ranlib") # set(CMAKE_CXX_FLAGS "/EHsc") - string(APPEND COMMON_COMPILER_FLAGS + string(APPEND COMMON_COMPILE_FLAGS "/EHsc " # "/Ox " # "/W3 " @@ -133,23 +134,29 @@ string(CONCAT DPNP_WARNING_FLAGS "-Wextra " "-Wshadow " "-Wall " - "-Wstring-prototypes " + "-Wstrict-prototypes " "-Wformat " "-Wformat-security " ) -string(APPEND COMMON_COMPILER_FLAGS +string(APPEND COMMON_COMPILE_FLAGS "${DPNP_WARNING_FLAGS}" ) # debug/release compile definitions if(DPNP_DEBUG_ENABLE) set(CMAKE_BUILD_TYPE "Debug") - string(APPEND COMMON_COMPILER_FLAGS + string(APPEND COMMON_COMPILE_FLAGS "-O0 " + "-ggdb3 " + ) + string(APPEND COMMON_LINK_FLAGS + "-O0 " + "-ggdb3 " + "-fsycl-link-huge-device-code " ) else() set(CMAKE_BUILD_TYPE "Release") - string(APPEND COMMON_COMPILER_FLAGS + string(APPEND COMMON_COMPILE_FLAGS "-O3 " ) endif() @@ -162,7 +169,7 @@ string(CONCAT DPNP_DEFS "-D_FORTIFY_SOURCE=2 " ) if(NOT WIN32) - string(APPEND COMMON_COMPILER_FLAGS + string(APPEND COMMON_COMPILE_FLAGS "-fno-delete-null-pointer-checks " "-fstack-protector-strong " "-fno-strict-overflow " diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile index 6c83bb0e8465..3d6c971a7991 100644 --- a/dpnp/backend/doc/Doxyfile +++ b/dpnp/backend/doc/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = "DPNP C++ backend kernel library" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 0.11.0 +PROJECT_NUMBER = 0.11.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp index f5ee23d755f2..0f6cb5b31deb 100644 --- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -23,6 +23,8 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#if defined(MACRO_1ARG_1TYPE_OP) + /* * This header file contains single argument element wise functions definitions * @@ -35,10 +37,6 @@ * */ -#ifndef MACRO_1ARG_1TYPE_OP -#error "MACRO_1ARG_1TYPE_OP is not defined" -#endif - #ifdef _SECTION_DOCUMENTATION_GENERATION_ #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__) \ @@ -88,7 +86,7 @@ const shape_elem_type* input1_strides, \ const size_t* where); -#endif +#endif // _SECTION_DOCUMENTATION_GENERATION_ MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func)) MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func)) @@ -107,3 +105,62 @@ MACRO_1ARG_1TYPE_OP(dpnp_square_c, oneapi::mkl::vm::sqr(q, input1_size, input1_data, result)) #undef MACRO_1ARG_1TYPE_OP + +#elif defined(MACRO_1ARG_1TYPE_LOGIC_OP) + +/* + * This header file contains single argument element wise functions definitions + * + * Macro `MACRO_1ARG_1TYPE_LOGIC_OP` must be defined before usage + * + * Parameters: + * - public name of the function and kernel name + * - operation used to calculate the result + * + */ + +#ifdef _SECTION_DOCUMENTATION_GENERATION_ + +#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__) \ + /** @ingroup BACKEND_API */ \ + /** @brief Per element operation function __name__ */ \ + /** */ \ + /** Function "__name__" executes operator "__operation__" over corresponding elements of input array */ \ + /** */ \ + /** @param[in] q_ref Reference to SYCL queue. */ \ + /** @param[out] result_out Output array. */ \ + /** @param[in] result_size Output array size. */ \ + /** @param[in] result_ndim Number of output array dimensions. */ \ + /** @param[in] result_shape Output array shape. */ \ + /** @param[in] result_strides Output array strides. */ \ + /** @param[in] input1_in Input array 1. */ \ + /** @param[in] input1_size Input array 1 size. */ \ + /** @param[in] input1_ndim Number of input array 1 dimensions. */ \ + /** @param[in] input1_shape Input array 1 shape. */ \ + /** @param[in] input1_strides Input array 1 strides. */ \ + /** @param[in] where Where condition. */ \ + /** @param[in] dep_event_vec_ref Reference to vector of SYCL events. */ \ + template \ + DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ + void* result_out, \ + const size_t result_size, \ + const size_t result_ndim, \ + const shape_elem_type* result_shape, \ + const shape_elem_type* result_strides, \ + const void* input1_in, \ + const size_t input1_size, \ + const size_t input1_ndim, \ + const shape_elem_type* input1_shape, \ + const shape_elem_type* input1_strides, \ + const size_t* where, \ + const DPCTLEventVectorRef dep_event_vec_ref); + +#endif // _SECTION_DOCUMENTATION_GENERATION_ + +MACRO_1ARG_1TYPE_LOGIC_OP(dpnp_logical_not_c, !input1_elem) + +#undef MACRO_1ARG_1TYPE_LOGIC_OP + +#else +#error "MACRO_1ARG_1TYPE_OP or MACRO_1ARG_1TYPE_LOGIC_OP is not defined" +#endif // MACRO_1ARG_1TYPE_OP || MACRO_1ARG_1TYPE_LOGIC_OP diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp new file mode 100644 index 000000000000..4b6c4290ef31 --- /dev/null +++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp @@ -0,0 +1,99 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +/* + * This header file contains single argument element wise functions definitions + * + * Macro `MACRO_2ARG_2TYPES_LOGIC_OP` must be defined before usage + * + * Parameters: + * - public name of the function and kernel name + * - operation used to calculate the result + * + */ + +#ifndef MACRO_2ARG_2TYPES_LOGIC_OP +#error "MACRO_2ARG_2TYPES_LOGIC_OP is not defined" +#endif + +#ifdef _SECTION_DOCUMENTATION_GENERATION_ + +#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__) \ + /** @ingroup BACKEND_API */ \ + /** @brief Per element operation function __name__ */ \ + /** */ \ + /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays */ \ + /** */ \ + /** @param[in] q_ref Reference to SYCL queue. */ \ + /** @param[out] result_out Output array. */ \ + /** @param[in] result_size Output array size. */ \ + /** @param[in] result_ndim Number of output array dimensions. */ \ + /** @param[in] result_shape Output array shape. */ \ + /** @param[in] result_strides Output array strides. */ \ + /** @param[in] input1_in Input array 1. */ \ + /** @param[in] input1_size Input array 1 size. */ \ + /** @param[in] input1_ndim Number of input array 1 dimensions. */ \ + /** @param[in] input1_shape Input array 1 shape. */ \ + /** @param[in] input1_strides Input array 1 strides. */ \ + /** @param[in] input2_in Input array 2. */ \ + /** @param[in] input2_size Input array 2 size. */ \ + /** @param[in] input2_ndim Number of input array 2 dimensions. */ \ + /** @param[in] input2_shape Input array 2 shape. */ \ + /** @param[in] input2_strides Input array 2 strides. */ \ + /** @param[in] where Where condition. */ \ + /** @param[in] dep_event_vec_ref Reference to vector of SYCL events. */ \ + template \ + DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ + void* result_out, \ + const size_t result_size, \ + const size_t result_ndim, \ + const shape_elem_type* result_shape, \ + const shape_elem_type* result_strides, \ + const void* input1_in, \ + const size_t input1_size, \ + const size_t input1_ndim, \ + const shape_elem_type* input1_shape, \ + const shape_elem_type* input1_strides, \ + const void* input2_in, \ + const size_t input2_size, \ + const size_t input2_ndim, \ + const shape_elem_type* input2_shape, \ + const shape_elem_type* input2_strides, \ + const size_t* where, \ + const DPCTLEventVectorRef dep_event_vec_ref); + +#endif + +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_equal_c, input1_elem == input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_and_c, input1_elem && input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_or_c, input1_elem || input2_elem) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_xor_c, (!!input1_elem) != (!!input2_elem)) +MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem) + +#undef MACRO_2ARG_2TYPES_LOGIC_OP diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 5d4ae22f796f..e345c6eefea7 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -31,7 +31,10 @@ * Parameters: * - public name of the function and kernel name * - operation used to calculate the result + * - vector operation over SYCL group used to calculate the result + * - list of types vector operation accepts * - mkl operation used to calculate the result + * - list of types mkl operation accepts * */ @@ -41,11 +44,12 @@ #ifdef _SECTION_DOCUMENTATION_GENERATION_ -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ /** @ingroup BACKEND_API */ \ /** @brief Per element operation function __name__ */ \ /** */ \ - /** Function "__name__" executes operator "__operation1__" over corresponding elements of input arrays */ \ + /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays */ \ /** */ \ /** @param[in] q_ref Reference to SYCL queue. */ \ /** @param[out] result_out Output array. */ \ @@ -105,23 +109,84 @@ #endif -MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, oneapi::mkl::vm::add) -MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, sycl::atan2((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::atan2) +MACRO_2ARG_3TYPES_OP(dpnp_add_c, + input1_elem + input2_elem, + x1 + x2, + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), + oneapi::mkl::vm::add, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) + +MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, + sycl::atan2((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::atan2, + MACRO_UNPACK_TYPES(float, double)) + MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, sycl::copysign((double)input1_elem, (double)input2_elem), - oneapi::mkl::vm::copysign) -MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, oneapi::mkl::vm::div) -MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::fmod) -MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, sycl::hypot((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::hypot) -MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, sycl::max(input1_elem, input2_elem), oneapi::mkl::vm::fmax) -MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, sycl::min(input1_elem, input2_elem), oneapi::mkl::vm::fmin) + nullptr, + std::false_type, + oneapi::mkl::vm::copysign, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_divide_c, + input1_elem / input2_elem, + x1 / x2, + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), + oneapi::mkl::vm::div, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) + +MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, + sycl::fmod((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmod, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, + sycl::hypot((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::hypot, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, + sycl::max(input1_elem, input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmax, + MACRO_UNPACK_TYPES(float, double)) + +MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, + sycl::min(input1_elem, input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::fmin, + MACRO_UNPACK_TYPES(float, double)) // "multiply" needs to be standalone kernel (not autogenerated) due to complex algorithm. This is not an element wise. // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3" // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10] -MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem* input2_elem, oneapi::mkl::vm::mul) +MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, + input1_elem * input2_elem, + x1 * x2, + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), + oneapi::mkl::vm::mul, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) + +MACRO_2ARG_3TYPES_OP(dpnp_power_c, + sycl::pow((double)input1_elem, (double)input2_elem), + nullptr, + std::false_type, + oneapi::mkl::vm::pow, + MACRO_UNPACK_TYPES(float, double)) -MACRO_2ARG_3TYPES_OP(dpnp_power_c, sycl::pow((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::pow) -MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, oneapi::mkl::vm::sub) +MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, + input1_elem - input2_elem, + x1 - x2, + MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t), + oneapi::mkl::vm::sub, + MACRO_UNPACK_TYPES(float, double, std::complex, std::complex)) #undef MACRO_2ARG_3TYPES_OP diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 42c05f0fd61d..7a80b40a3d2e 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -95,7 +95,7 @@ INP_DLLEXPORT void dpnp_queue_initialize_c(QueueOptions selector = QueueOptions: * @ingroup BACKEND_API * @brief SYCL queue device status. * - * Return 1 if current @ref queue is related to cpu or host device. return 0 otherwise. + * Return 1 if current @ref queue is related to cpu device. return 0 otherwise. */ INP_DLLEXPORT size_t dpnp_queue_is_cpu_c(); @@ -1806,7 +1806,31 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size); #include -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ +#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__) \ + template \ + INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ + void* result_out, \ + const size_t result_size, \ + const size_t result_ndim, \ + const shape_elem_type* result_shape, \ + const shape_elem_type* result_strides, \ + const void* input1_in, \ + const size_t input1_size, \ + const size_t input1_ndim, \ + const shape_elem_type* input1_shape, \ + const shape_elem_type* input1_strides, \ + const void* input2_in, \ + const size_t input2_size, \ + const size_t input2_ndim, \ + const shape_elem_type* input2_shape, \ + const shape_elem_type* input2_strides, \ + const size_t* where, \ + const DPCTLEventVectorRef dep_event_vec_ref); + +#include + +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ template \ INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ void* result_out, \ diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 8e209d38317a..fb154fcabfac 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -151,10 +151,10 @@ enum class DPNPFuncName : size_t DPNP_FN_EIG_EXT, /**< Used in numpy.linalg.eig() impl, requires extra parameters */ DPNP_FN_EIGVALS, /**< Used in numpy.linalg.eigvals() impl */ DPNP_FN_EIGVALS_EXT, /**< Used in numpy.linalg.eigvals() impl, requires extra parameters */ + DPNP_FN_EQUAL_EXT, /**< Used in numpy.equal() impl, requires extra parameters */ DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra parameters */ DPNP_FN_EYE, /**< Used in numpy.eye() impl */ - DPNP_FN_EYE_EXT, /**< Used in numpy.eye() impl, requires extra parameters */ DPNP_FN_EXP, /**< Used in numpy.exp() impl */ DPNP_FN_EXP_EXT, /**< Used in numpy.exp() impl, requires extra parameters */ DPNP_FN_EXP2, /**< Used in numpy.exp2() impl */ @@ -179,6 +179,8 @@ enum class DPNPFuncName : size_t DPNP_FN_FMOD_EXT, /**< Used in numpy.fmod() impl, requires extra parameters */ DPNP_FN_FULL, /**< Used in numpy.full() impl */ DPNP_FN_FULL_LIKE, /**< Used in numpy.full_like() impl */ + DPNP_FN_GREATER_EXT, /**< Used in numpy.greater() impl, requires extra parameters */ + DPNP_FN_GREATER_EQUAL_EXT, /**< Used in numpy.greater_equal() impl, requires extra parameters */ DPNP_FN_HYPOT, /**< Used in numpy.hypot() impl */ DPNP_FN_HYPOT_EXT, /**< Used in numpy.hypot() impl, requires extra parameters */ DPNP_FN_IDENTITY, /**< Used in numpy.identity() impl */ @@ -193,6 +195,8 @@ enum class DPNPFuncName : size_t DPNP_FN_KRON_EXT, /**< Used in numpy.kron() impl, requires extra parameters */ DPNP_FN_LEFT_SHIFT, /**< Used in numpy.left_shift() impl */ DPNP_FN_LEFT_SHIFT_EXT, /**< Used in numpy.left_shift() impl, requires extra parameters */ + DPNP_FN_LESS_EXT, /**< Used in numpy.less() impl, requires extra parameters */ + DPNP_FN_LESS_EQUAL_EXT, /**< Used in numpy.less_equal() impl, requires extra parameters */ DPNP_FN_LOG, /**< Used in numpy.log() impl */ DPNP_FN_LOG_EXT, /**< Used in numpy.log() impl, requires extra parameters */ DPNP_FN_LOG10, /**< Used in numpy.log10() impl */ @@ -201,6 +205,10 @@ enum class DPNPFuncName : size_t DPNP_FN_LOG2_EXT, /**< Used in numpy.log2() impl, requires extra parameters */ DPNP_FN_LOG1P, /**< Used in numpy.log1p() impl */ DPNP_FN_LOG1P_EXT, /**< Used in numpy.log1p() impl, requires extra parameters */ + DPNP_FN_LOGICAL_AND_EXT, /**< Used in numpy.logical_and() impl, requires extra parameters */ + DPNP_FN_LOGICAL_NOT_EXT, /**< Used in numpy.logical_not() impl, requires extra parameters */ + DPNP_FN_LOGICAL_OR_EXT, /**< Used in numpy.logical_or() impl, requires extra parameters */ + DPNP_FN_LOGICAL_XOR_EXT, /**< Used in numpy.logical_xor() impl, requires extra parameters */ DPNP_FN_MATMUL, /**< Used in numpy.matmul() impl */ DPNP_FN_MATMUL_EXT, /**< Used in numpy.matmul() impl, requires extra parameters */ DPNP_FN_MATRIX_RANK, /**< Used in numpy.linalg.matrix_rank() impl */ @@ -227,6 +235,7 @@ enum class DPNPFuncName : size_t DPNP_FN_NEGATIVE_EXT, /**< Used in numpy.negative() impl, requires extra parameters */ DPNP_FN_NONZERO, /**< Used in numpy.nonzero() impl */ DPNP_FN_NONZERO_EXT, /**< Used in numpy.nonzero() impl, requires extra parameters */ + DPNP_FN_NOT_EQUAL_EXT, /**< Used in numpy.not_equal() impl, requires extra parameters */ DPNP_FN_ONES, /**< Used in numpy.ones() impl */ DPNP_FN_ONES_LIKE, /**< Used in numpy.ones_like() impl */ DPNP_FN_PARTITION, /**< Used in numpy.partition() impl */ @@ -361,9 +370,7 @@ enum class DPNPFuncName : size_t DPNP_FN_TRI, /**< Used in numpy.tri() impl */ DPNP_FN_TRI_EXT, /**< Used in numpy.tri() impl, requires extra parameters */ DPNP_FN_TRIL, /**< Used in numpy.tril() impl */ - DPNP_FN_TRIL_EXT, /**< Used in numpy.tril() impl, requires extra parameters */ DPNP_FN_TRIU, /**< Used in numpy.triu() impl */ - DPNP_FN_TRIU_EXT, /**< Used in numpy.triu() impl, requires extra parameters */ DPNP_FN_TRUNC, /**< Used in numpy.trunc() impl */ DPNP_FN_TRUNC_EXT, /**< Used in numpy.trunc() impl, requires extra parameters */ DPNP_FN_VANDER, /**< Used in numpy.vander() impl */ @@ -385,13 +392,13 @@ enum class DPNPFuncName : size_t enum class DPNPFuncType : size_t { DPNP_FT_NONE, /**< Very first element of the enumeration */ + DPNP_FT_BOOL, /**< analog of numpy.bool_ or bool */ DPNP_FT_INT, /**< analog of numpy.int32 or int */ DPNP_FT_LONG, /**< analog of numpy.int64 or long */ DPNP_FT_FLOAT, /**< analog of numpy.float32 or float */ DPNP_FT_DOUBLE, /**< analog of numpy.float32 or double */ DPNP_FT_CMPLX64, /**< analog of numpy.complex64 or std::complex */ - DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex */ - DPNP_FT_BOOL /**< analog of numpy.bool or numpy.bool_ or bool */ + DPNP_FT_CMPLX128 /**< analog of numpy.complex128 or std::complex */ }; /** @@ -410,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs); */ typedef struct DPNPFuncData { - DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */ - void* ptr; /**< C++ backend function pointer */ + DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64) + : return_type(gen_type) + , ptr(gen_ptr) + , return_type_no_fp64(type_no_fp64) + , ptr_no_fp64(ptr_no_fp64) + { + } + DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr) + : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr) + { + } + DPNPFuncData() + : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr) + { + } + + DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */ + void* ptr; /**< C++ backend function pointer */ + DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */ + void* ptr_no_fp64; /**< alternative C++ backend function pointer when no fp64 support by device */ } DPNPFuncData_t; /** diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp index 71d93842feb0..a29fcca0975b 100644 --- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -292,13 +292,6 @@ void dpnp_eye_c(void* result1, int k, const shape_elem_type* res_shape) template void (*dpnp_eye_default_c)(void*, int, const shape_elem_type*) = dpnp_eye_c<_DataType>; -template -DPCTLSyclEventRef (*dpnp_eye_ext_c)(DPCTLSyclQueueRef, - void*, - int, - const shape_elem_type*, - const DPCTLEventVectorRef) = dpnp_eye_c<_DataType>; - template DPCTLSyclEventRef dpnp_full_c(DPCTLSyclQueueRef q_ref, void* array_in, @@ -1062,17 +1055,6 @@ void (*dpnp_tril_default_c)(void*, const size_t, const size_t) = dpnp_tril_c<_DataType>; -template -DPCTLSyclEventRef (*dpnp_tril_ext_c)(DPCTLSyclQueueRef, - void*, - void*, - const int, - shape_elem_type*, - shape_elem_type*, - const size_t, - const size_t, - const DPCTLEventVectorRef) = dpnp_tril_c<_DataType>; - template DPCTLSyclEventRef dpnp_triu_c(DPCTLSyclQueueRef q_ref, void* array_in, @@ -1225,17 +1207,6 @@ void (*dpnp_triu_default_c)(void*, const size_t, const size_t) = dpnp_triu_c<_DataType>; -template -DPCTLSyclEventRef (*dpnp_triu_ext_c)(DPCTLSyclQueueRef, - void*, - void*, - const int, - shape_elem_type*, - shape_elem_type*, - const size_t, - const size_t, - const DPCTLEventVectorRef) = dpnp_triu_c<_DataType>; - template DPCTLSyclEventRef dpnp_zeros_c(DPCTLSyclQueueRef q_ref, void* result, @@ -1319,11 +1290,6 @@ void func_map_init_arraycreation(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_EYE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_default_c}; fmap[DPNPFuncName::DPNP_FN_EYE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_default_c}; - fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_eye_ext_c}; - fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_eye_ext_c}; - fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_ext_c}; - fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FULL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_full_default_c}; fmap[DPNPFuncName::DPNP_FN_FULL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_full_default_c}; fmap[DPNPFuncName::DPNP_FN_FULL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_full_default_c}; @@ -1451,21 +1417,11 @@ void func_map_init_arraycreation(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_TRIL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_default_c}; fmap[DPNPFuncName::DPNP_FN_TRIL][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_default_c}; - fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_tril_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_tril_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIU][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_default_c}; fmap[DPNPFuncName::DPNP_FN_TRIU][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_default_c}; fmap[DPNPFuncName::DPNP_FN_TRIU][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_default_c}; fmap[DPNPFuncName::DPNP_FN_TRIU][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_default_c}; - fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_ext_c}; - fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_ext_c}; - fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_zeros_default_c}; fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_zeros_default_c}; fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_zeros_default_c}; diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index eafa50d4cee2..5133473d3935 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -825,7 +825,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; } -#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__) \ + +#define MACRO_2ARG_3TYPES_OP( \ + __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__) \ template \ @@ -834,6 +836,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) template \ + class __name__##_sg_kernel; \ + \ + template \ class __name__##_broadcast_kernel; \ \ template (q_ref)); \ \ - DPNPC_ptr_adapter<_DataType_input1> input1_ptr(q_ref, input1_in, input1_size); \ - DPNPC_ptr_adapter input1_shape_ptr(q_ref, input1_shape, input1_ndim, true); \ - DPNPC_ptr_adapter input1_strides_ptr(q_ref, input1_strides, input1_ndim, true); \ - DPNPC_ptr_adapter<_DataType_input2> input2_ptr(q_ref, input2_in, input2_size); \ - DPNPC_ptr_adapter input2_shape_ptr(q_ref, input2_shape, input2_ndim, true); \ - DPNPC_ptr_adapter input2_strides_ptr(q_ref, input2_strides, input2_ndim, true); \ - \ - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true); \ - DPNPC_ptr_adapter result_shape_ptr(q_ref, result_shape, result_ndim); \ - DPNPC_ptr_adapter result_strides_ptr(q_ref, result_strides, result_ndim); \ - \ - _DataType_input1* input1_data = input1_ptr.get_ptr(); \ - shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr(); \ - shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr(); \ + _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast(input1_in)); \ + _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast(input2_in)); \ + _DataType_output* result = static_cast<_DataType_output*>(result_out); \ \ - _DataType_input2* input2_data = input2_ptr.get_ptr(); \ - shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr(); \ - shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr(); \ + bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \ \ - _DataType_output* result = result_ptr.get_ptr(); \ - shape_elem_type* result_shape_data = result_shape_ptr.get_ptr(); \ - shape_elem_type* result_strides_data = result_strides_ptr.get_ptr(); \ + shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \ \ - bool use_broadcasting = !array_equal(input1_shape_data, input1_ndim, input2_shape_data, input2_ndim); \ + get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \ + bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); \ + delete[] input1_shape_offsets; \ \ - const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type); \ - shape_elem_type* input1_shape_offsets = \ - reinterpret_cast(sycl::malloc_shared(input1_shape_size_in_bytes, q)); \ - get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets); \ - bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim); \ - sycl::free(input1_shape_offsets, q); \ + shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; \ \ - const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type); \ - shape_elem_type* input2_shape_offsets = \ - reinterpret_cast(sycl::malloc_shared(input2_shape_size_in_bytes, q)); \ - get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets); \ - use_strides = \ - use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim); \ - sycl::free(input2_shape_offsets, q); \ + get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); \ + use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); \ + delete[] input2_shape_offsets; \ \ sycl::event event; \ sycl::range<1> gws(result_size); \ @@ -921,28 +906,26 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ DPNPC_id<_DataType_input1>* input1_it; \ const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \ - input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ - input1_it_size_in_bytes)); \ - new (input1_it) \ - DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim); \ + input1_it = \ + reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); \ + new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \ \ - input1_it->broadcast_to_shape(result_shape_data, result_ndim); \ + input1_it->broadcast_to_shape(result_shape, result_ndim); \ \ DPNPC_id<_DataType_input2>* input2_it; \ const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \ - input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ - input2_it_size_in_bytes)); \ - new (input2_it) \ - DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim); \ + input2_it = \ + reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); \ + new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \ \ - input2_it->broadcast_to_shape(result_shape_data, result_ndim); \ + input2_it->broadcast_to_shape(result_shape, result_ndim); \ \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ const _DataType_output input1_elem = (*input1_it)[i]; \ const _DataType_output input2_elem = (*input2_it)[i]; \ - result[i] = __operation1__; \ + result[i] = __operation__; \ } \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ @@ -951,8 +934,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) gws, kernel_parallel_for_func); \ }; \ \ - event = q.submit(kernel_func); \ - event.wait(); \ + q.submit(kernel_func).wait(); \ \ input1_it->~DPNPC_id(); \ input2_it->~DPNPC_id(); \ @@ -961,11 +943,41 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) } \ else if (use_strides) \ { \ + if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \ + { \ + throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + \ + " mismatches with either input1 ndim=" + std::to_string(input1_ndim) + \ + " or input2 ndim=" + std::to_string(input2_ndim)); \ + } \ + \ + /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ \ + using usm_host_allocatorT = sycl::usm_allocator; \ + \ + size_t strides_size = 3 * result_ndim; \ + shape_elem_type* dev_strides_data = sycl::malloc_device(strides_size, q); \ + \ + /* create host temporary for packed strides managed by shared pointer */ \ + auto strides_host_packed = \ + std::vector(strides_size, usm_host_allocatorT(q)); \ + \ + /* packed vector is concatenation of result_strides, input1_strides and input2_strides */ \ + std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); \ + std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim); \ + std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); \ + \ + auto copy_strides_ev = \ + q.copy(strides_host_packed.data(), dev_strides_data, strides_host_packed.size()); \ + \ auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ { \ + const shape_elem_type* result_strides_data = &dev_strides_data[0]; \ + const shape_elem_type* input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type* input2_strides_data = &dev_strides_data[2]; \ + \ size_t input1_id = 0; \ size_t input2_id = 0; \ + \ for (size_t i = 0; i < result_ndim; ++i) \ { \ const size_t output_xyz_id = \ @@ -976,34 +988,118 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) \ const _DataType_output input1_elem = input1_data[input1_id]; \ const _DataType_output input2_elem = input2_data[input2_id]; \ - result[output_id] = __operation1__; \ + result[output_id] = __operation__; \ } \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.depends_on(copy_strides_ev); \ cgh.parallel_for< \ class __name__##_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>( \ gws, kernel_parallel_for_func); \ }; \ \ - event = q.submit(kernel_func); \ + q.submit(kernel_func).wait(); \ + \ + sycl::free(dev_strides_data, q); \ + return event_ref; \ } \ else \ { \ - if constexpr ((std::is_same<_DataType_input1, double>::value || \ - std::is_same<_DataType_input1, float>::value) && \ - std::is_same<_DataType_input2, _DataType_input1>::value) \ + if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>) \ { \ - event = __operation2__(q, result_size, input1_data, input2_data, result); \ + event = __mkl_operation__(q, result_size, input1_data, input2_data, result); \ } \ - else \ + else if constexpr (none_of_both_types<_DataType_input1, \ + _DataType_input2, \ + std::complex, \ + std::complex>) \ { \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + constexpr size_t lws = 64; \ + constexpr unsigned int vec_sz = 8; \ + constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space; \ + \ + auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \ + auto lws_range = sycl::range<1>(lws); \ + \ + auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ + auto sg = nd_it.get_sub_group(); \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = \ + vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); \ + \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) \ + { \ + using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>; \ + using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>; \ + using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>; \ + \ + sycl::vec<_DataType_output, vec_sz> res_vec; \ + \ + if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>) \ + { \ + if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>) \ + { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(input1_ptrT(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(input2_ptrT(&input2_data[start])); \ + \ + res_vec = __vec_operation__; \ + } \ + else /* input types don't match result type, so explicit casting is required */ \ + { \ + sycl::vec<_DataType_output, vec_sz> x1 = \ + dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>( \ + sg.load(input1_ptrT(&input1_data[start]))); \ + sycl::vec<_DataType_output, vec_sz> x2 = \ + dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>( \ + sg.load(input2_ptrT(&input2_data[start]))); \ + \ + res_vec = __vec_operation__; \ + } \ + } \ + else \ + { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(input1_ptrT(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(input2_ptrT(&input2_data[start])); \ + \ + for (size_t k = 0; k < vec_sz; ++k) \ + { \ + const _DataType_output input1_elem = x1[k]; \ + const _DataType_output input2_elem = x2[k]; \ + res_vec[k] = __operation__; \ + } \ + } \ + sg.store(result_ptrT(&result[start]), res_vec); \ + } \ + else \ { \ - const _DataType_output input1_elem = input1_data[i]; \ - const _DataType_output input2_elem = input2_data[i]; \ - result[i] = __operation1__; \ + for (size_t k = start + sg.get_local_id()[0]; k < result_size; k += max_sg_size) \ + { \ + const _DataType_output input1_elem = input1_data[k]; \ + const _DataType_output input2_elem = input2_data[k]; \ + result[k] = __operation__; \ + } \ } \ + }; \ + \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.parallel_for< \ + class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>( \ + sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ + }; \ + event = q.submit(kernel_func); \ + } \ + else /* either input1 or input2 has complex type */ \ + { \ + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ + \ + const _DataType_output input1_elem = input1_data[i]; \ + const _DataType_output input2_elem = input2_data[i]; \ + result[i] = __operation__; \ }; \ auto kernel_func = [&](sycl::handler& cgh) { \ cgh.parallel_for>( \ @@ -1013,18 +1109,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) } \ } \ \ - input1_ptr.depends_on(event); \ - input1_shape_ptr.depends_on(event); \ - input1_strides_ptr.depends_on(event); \ - input2_ptr.depends_on(event); \ - input2_shape_ptr.depends_on(event); \ - input2_strides_ptr.depends_on(event); \ - result_ptr.depends_on(event); \ - result_shape_ptr.depends_on(event); \ - result_strides_ptr.depends_on(event); \ - \ event_ref = reinterpret_cast(&event); \ - \ return DPCTLEvent_Copy(event_ref); \ } \ \ @@ -1048,26 +1133,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) { \ DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); \ DPCTLEventVectorRef dep_event_vec_ref = nullptr; \ - DPCTLSyclEventRef event_ref = __name__<_DataType_output, _DataType_input1, _DataType_input2>( \ - q_ref, \ - result_out, \ - result_size, \ - result_ndim, \ - result_shape, \ - result_strides, \ - input1_in, \ - input1_size, \ - input1_ndim, \ - input1_shape, \ - input1_strides, \ - input2_in, \ - input2_size, \ - input2_ndim, \ - input2_shape, \ - input2_strides, \ - where, \ - dep_event_vec_ref \ - ); \ + DPCTLSyclEventRef event_ref = \ + __name__<_DataType_output, _DataType_input1, _DataType_input2>(q_ref, \ + result_out, \ + result_size, \ + result_ndim, \ + result_shape, \ + result_strides, \ + input1_in, \ + input1_size, \ + input1_ndim, \ + input1_shape, \ + input1_strides, \ + input2_in, \ + input2_size, \ + input2_ndim, \ + input2_shape, \ + input2_strides, \ + where, \ + dep_event_vec_ref); \ DPCTLEvent_WaitAndThrow(event_ref); \ DPCTLEvent_Delete(event_ref); \ } \ @@ -1108,12 +1192,91 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const shape_elem_type*, \ const shape_elem_type*, \ const size_t*, \ - const DPCTLEventVectorRef) = __name__<_DataType_output, \ - _DataType_input1, \ - _DataType_input2>; + const DPCTLEventVectorRef) = \ + __name__<_DataType_output, _DataType_input1, _DataType_input2>; #include +template +static constexpr DPNPFuncType get_divide_res_type() +{ + constexpr auto widest_type = populate_func_types(); + constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1; + + if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE) + { + return widest_type; + } + else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64) + { + if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE) + { + return DPNPFuncType::DPNP_FT_CMPLX128; + } + else if constexpr (has_fp64::value && + (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG)) + { + return DPNPFuncType::DPNP_FT_CMPLX128; + } + } + else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT) + { + if constexpr (has_fp64::value && + (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG)) + { + return DPNPFuncType::DPNP_FT_DOUBLE; + } + } + else if constexpr (has_fp64::value) + { + return DPNPFuncType::DPNP_FT_DOUBLE; + } + else + { + return DPNPFuncType::DPNP_FT_FLOAT; + } + return widest_type; +} + +template +static void func_map_elemwise_2arg_3type_core(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_ADD_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_add_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_multiply_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] = + {populate_func_types(), + (void*)dpnp_subtract_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] = + {get_divide_res_type(), + (void*)dpnp_divide_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_divide_res_type(), + (void*)dpnp_divide_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + +template +static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap) +{ + ((func_map_elemwise_2arg_3type_core(fmap)), ...); +} + static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT, @@ -1149,39 +1312,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_ADD][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_add_c_default}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_INT] = {eft_INT, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_LNG] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_INT] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_LNG] = {eft_LNG, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_INT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_LNG] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_FLT] = {eft_FLT, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_INT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_LNG] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_FLT] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_DBL] = {eft_DBL, - (void*)dpnp_add_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_arctan2_c_default}; fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_LNG] = {eft_DBL, @@ -1347,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_divide_c_default}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL, - (void*)dpnp_divide_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_fmod_c_default}; fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG, @@ -1725,111 +1822,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = { eft_C128, (void*)dpnp_multiply_c_default, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_BLN] = { - eft_BLN, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_INT] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, bool, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, bool, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_BLN] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_INT] = { - eft_INT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, int32_t, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, int32_t, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_BLN] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, int64_t, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, int64_t, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_BLN] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_INT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_LNG] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, float, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, float, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_BLN] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void*)dpnp_multiply_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, double, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, double, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_BLN] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_INT] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_LNG] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_FLT] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_DBL] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C64] = { - eft_C64, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_BLN] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, bool>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_INT] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, int32_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_LNG] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, int64_t>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_FLT] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, float>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_DBL] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C64] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C128] = { - eft_C128, (void*)dpnp_multiply_c_ext, std::complex, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_power_c_default}; fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_LNG] = {eft_LNG, @@ -1929,38 +1921,7 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_DBL][eft_DBL] = { eft_DBL, (void*)dpnp_subtract_c_default}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_INT] = { - eft_INT, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void*)dpnp_subtract_c_ext}; + func_map_elemwise_2arg_3type_helper(fmap); return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp index 3d39f2f373c7..b3f9716d73f1 100644 --- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -182,7 +182,10 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref, size_t inverse, const size_t norm) { + // avoid warning unused variable (void)result_shape; + (void)input_size; + (void)result_size; if (!shape_size) { return; @@ -253,6 +256,9 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef const size_t norm, const size_t real) { + // avoid warning unused variable + (void)input_size; + DPCTLSyclEventRef event_ref = nullptr; if (!shape_size) { return event_ref; diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp index 5cde013b69f8..0b80ac678d34 100644 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -896,6 +896,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref, const DPCTLEventVectorRef dep_event_vec_ref) { // avoid warning unused variable + (void)array1_size; (void)dep_event_vec_ref; DPCTLSyclEventRef event_ref = nullptr; diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp index 109246913589..157347aa90c0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ #include "dpnp_fptr.hpp" #include "dpnp_iface.hpp" +#include "dpnp_iterator.hpp" #include "dpnpc_memory_adapter.hpp" #include "queue_sycl.hpp" @@ -286,6 +287,457 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef, const size_t, const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>; + +#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__) \ + template \ + class __name__##_kernel; \ + \ + template \ + class __name__##_broadcast_kernel; \ + \ + template \ + class __name__##_strides_kernel; \ + \ + template \ + DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ + void* result_out, \ + const size_t result_size, \ + const size_t result_ndim, \ + const shape_elem_type* result_shape, \ + const shape_elem_type* result_strides, \ + const void* input1_in, \ + const size_t input1_size, \ + const size_t input1_ndim, \ + const shape_elem_type* input1_shape, \ + const shape_elem_type* input1_strides, \ + const size_t* where, \ + const DPCTLEventVectorRef dep_event_vec_ref) \ + { \ + /* avoid warning unused variable*/ \ + (result_shape); \ + (void)where; \ + (void)dep_event_vec_ref; \ + \ + DPCTLSyclEventRef event_ref = nullptr; \ + \ + if (!input1_size) \ + { \ + return event_ref; \ + } \ + \ + sycl::queue q = *(reinterpret_cast(q_ref)); \ + \ + _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast(input1_in)); \ + bool* result = static_cast(result_out); \ + \ + shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \ + \ + get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \ + bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); \ + delete[] input1_shape_offsets; \ + \ + if (use_strides) \ + { \ + if (result_ndim != input1_ndim) \ + { \ + throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + \ + " mismatches with input1 ndim=" + std::to_string(input1_ndim)); \ + } \ + \ + /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ \ + using usm_host_allocatorT = sycl::usm_allocator; \ + \ + size_t strides_size = 2 * result_ndim; \ + shape_elem_type *dev_strides_data = sycl::malloc_device(strides_size, q); \ + \ + /* create host temporary for packed strides managed by shared pointer */ \ + auto strides_host_packed = std::vector(strides_size, \ + usm_host_allocatorT(q)); \ + \ + /* packed vector is concatenation of result_strides and input1_strides */ \ + std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); \ + std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim); \ + \ + auto copy_strides_ev = q.copy(strides_host_packed.data(), \ + dev_strides_data, \ + strides_host_packed.size()); \ + \ + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ + const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ + { \ + const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ + \ + size_t input1_id = 0; \ + \ + for (size_t i = 0; i < result_ndim; ++i) \ + { \ + const size_t output_xyz_id = \ + get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i); \ + input1_id += output_xyz_id * input1_strides_data[i]; \ + } \ + \ + const _DataType_input1 input1_elem = input1_data[input1_id]; \ + result[output_id] = __operation__; \ + } \ + }; \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.depends_on(copy_strides_ev); \ + cgh.parallel_for>( \ + sycl::range<1>(result_size), kernel_parallel_for_func); \ + }; \ + \ + q.submit(kernel_func).wait(); \ + \ + sycl::free(dev_strides_data, q); \ + return event_ref; \ + } \ + else \ + { \ + constexpr size_t lws = 64; \ + constexpr unsigned int vec_sz = 8; \ + constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space; \ + \ + auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \ + auto lws_range = sycl::range<1>(lws); \ + \ + auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ + auto sg = nd_it.get_sub_group(); \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * max_sg_size); \ + \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ + sycl::vec res_vec; \ + \ + for (size_t k = 0; k < vec_sz; ++k) { \ + const _DataType_input1 input1_elem = x1[k]; \ + res_vec[k] = __operation__; \ + } \ + sg.store(sycl::multi_ptr(&result[start]), res_vec); \ + \ + } \ + else { \ + for (size_t k = start; k < result_size; ++k) { \ + const _DataType_input1 input1_elem = input1_data[k]; \ + result[k] = __operation__; \ + } \ + } \ + }; \ + \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.parallel_for>( \ + sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ + }; \ + sycl::event event = q.submit(kernel_func); \ + \ + event_ref = reinterpret_cast(&event); \ + return DPCTLEvent_Copy(event_ref); \ + } \ + return event_ref; \ + } \ + \ + template \ + DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef, \ + void*, \ + const size_t, \ + const size_t, \ + const shape_elem_type*, \ + const shape_elem_type*, \ + const void*, \ + const size_t, \ + const size_t, \ + const shape_elem_type*, \ + const shape_elem_type*, \ + const size_t*, \ + const DPCTLEventVectorRef) = __name__<_DataType_input1>; + +#include + +template +static void func_map_logic_1arg_1type_helper(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_NOT_EXT][FTs][FTs] = + {eft_BLN, (void*)dpnp_logical_not_c_ext>}), ...); +} + + +#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__) \ + template \ + class __name__##_kernel; \ + \ + template \ + class __name__##_broadcast_kernel; \ + \ + template \ + class __name__##_strides_kernel; \ + \ + template \ + DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \ + void* result_out, \ + const size_t result_size, \ + const size_t result_ndim, \ + const shape_elem_type* result_shape, \ + const shape_elem_type* result_strides, \ + const void* input1_in, \ + const size_t input1_size, \ + const size_t input1_ndim, \ + const shape_elem_type* input1_shape, \ + const shape_elem_type* input1_strides, \ + const void* input2_in, \ + const size_t input2_size, \ + const size_t input2_ndim, \ + const shape_elem_type* input2_shape, \ + const shape_elem_type* input2_strides, \ + const size_t* where, \ + const DPCTLEventVectorRef dep_event_vec_ref) \ + { \ + /* avoid warning unused variable*/ \ + (void)where; \ + (void)dep_event_vec_ref; \ + \ + DPCTLSyclEventRef event_ref = nullptr; \ + \ + if (!input1_size || !input2_size) \ + { \ + return event_ref; \ + } \ + \ + sycl::queue q = *(reinterpret_cast(q_ref)); \ + \ + _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast(input1_in)); \ + _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast(input2_in)); \ + bool* result = static_cast(result_out); \ + \ + bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \ + \ + shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \ + \ + get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \ + bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim); \ + delete[] input1_shape_offsets; \ + \ + shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim]; \ + \ + get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets); \ + use_strides = \ + use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim); \ + delete[] input2_shape_offsets; \ + \ + sycl::event event; \ + sycl::range<1> gws(result_size); /* used only when use_broadcasting or use_strides is true */ \ + \ + if (use_broadcasting) \ + { \ + DPNPC_id<_DataType_input1>* input1_it; \ + const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \ + input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ + input1_it_size_in_bytes)); \ + new (input1_it) \ + DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \ + \ + input1_it->broadcast_to_shape(result_shape, result_ndim); \ + \ + DPNPC_id<_DataType_input2>* input2_it; \ + const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \ + input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(q_ref, \ + input2_it_size_in_bytes)); \ + new (input2_it) \ + DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \ + \ + input2_it->broadcast_to_shape(result_shape, result_ndim); \ + \ + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ + const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ + { \ + const _DataType_input1 input1_elem = (*input1_it)[i]; \ + const _DataType_input2 input2_elem = (*input2_it)[i]; \ + result[i] = __operation__; \ + } \ + }; \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.parallel_for< \ + class __name__##_broadcast_kernel<_DataType_input1, _DataType_input2>>( \ + gws, kernel_parallel_for_func); \ + }; \ + \ + q.submit(kernel_func).wait(); \ + \ + input1_it->~DPNPC_id(); \ + input2_it->~DPNPC_id(); \ + \ + return event_ref; \ + } \ + else if (use_strides) \ + { \ + if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \ + { \ + throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) + \ + " mismatches with either input1 ndim=" + std::to_string(input1_ndim) + \ + " or input2 ndim=" + std::to_string(input2_ndim)); \ + } \ + \ + /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */ \ + using usm_host_allocatorT = sycl::usm_allocator; \ + \ + size_t strides_size = 3 * result_ndim; \ + shape_elem_type *dev_strides_data = sycl::malloc_device(strides_size, q); \ + \ + /* create host temporary for packed strides managed by shared pointer */ \ + auto strides_host_packed = std::vector(strides_size, \ + usm_host_allocatorT(q)); \ + \ + /* packed vector is concatenation of result_strides, input1_strides and input2_strides */ \ + std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin()); \ + std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim); \ + std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim); \ + \ + auto copy_strides_ev = q.copy(strides_host_packed.data(), \ + dev_strides_data, \ + strides_host_packed.size()); \ + \ + auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ + const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \ + { \ + const shape_elem_type *result_strides_data = &dev_strides_data[0]; \ + const shape_elem_type *input1_strides_data = &dev_strides_data[1]; \ + const shape_elem_type *input2_strides_data = &dev_strides_data[2]; \ + \ + size_t input1_id = 0; \ + size_t input2_id = 0; \ + \ + for (size_t i = 0; i < result_ndim; ++i) \ + { \ + const size_t output_xyz_id = \ + get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i); \ + input1_id += output_xyz_id * input1_strides_data[i]; \ + input2_id += output_xyz_id * input2_strides_data[i]; \ + } \ + \ + const _DataType_input1 input1_elem = input1_data[input1_id]; \ + const _DataType_input2 input2_elem = input2_data[input2_id]; \ + result[output_id] = __operation__; \ + } \ + }; \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.depends_on(copy_strides_ev); \ + cgh.parallel_for< \ + class __name__##_strides_kernel<_DataType_input1, _DataType_input2>>( \ + gws, kernel_parallel_for_func); \ + }; \ + \ + q.submit(kernel_func).wait(); \ + \ + sycl::free(dev_strides_data, q); \ + return event_ref; \ + } \ + else \ + { \ + constexpr size_t lws = 64; \ + constexpr unsigned int vec_sz = 8; \ + constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space; \ + \ + auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws); \ + auto lws_range = sycl::range<1>(lws); \ + \ + auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) { \ + auto sg = nd_it.get_sub_group(); \ + const auto max_sg_size = sg.get_max_local_range()[0]; \ + const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + \ + sg.get_group_id()[0] * max_sg_size); \ + \ + if (start + static_cast(vec_sz) * max_sg_size < result_size) { \ + sycl::vec<_DataType_input1, vec_sz> x1 = \ + sg.load(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start])); \ + sycl::vec<_DataType_input2, vec_sz> x2 = \ + sg.load(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start])); \ + sycl::vec res_vec; \ + \ + for (size_t k = 0; k < vec_sz; ++k) { \ + const _DataType_input1 input1_elem = x1[k]; \ + const _DataType_input2 input2_elem = x2[k]; \ + res_vec[k] = __operation__; \ + } \ + sg.store(sycl::multi_ptr(&result[start]), res_vec); \ + \ + } \ + else { \ + for (size_t k = start; k < result_size; ++k) { \ + const _DataType_input1 input1_elem = input1_data[k]; \ + const _DataType_input2 input2_elem = input2_data[k]; \ + result[k] = __operation__; \ + } \ + } \ + }; \ + \ + auto kernel_func = [&](sycl::handler& cgh) { \ + cgh.parallel_for>( \ + sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func); \ + }; \ + event = q.submit(kernel_func); \ + } \ + \ + event_ref = reinterpret_cast(&event); \ + return DPCTLEvent_Copy(event_ref); \ + } \ + \ + template \ + DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef, \ + void*, \ + const size_t, \ + const size_t, \ + const shape_elem_type*, \ + const shape_elem_type*, \ + const void*, \ + const size_t, \ + const size_t, \ + const shape_elem_type*, \ + const shape_elem_type*, \ + const void*, \ + const size_t, \ + const size_t, \ + const shape_elem_type*, \ + const shape_elem_type*, \ + const size_t*, \ + const DPCTLEventVectorRef) = __name__<_DataType_input1, \ + _DataType_input2>; + +#include + +template +static void func_map_logic_2arg_2type_core(func_map_t& fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_EQUAL_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_equal_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_GREATER_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_greater_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_GREATER_EQUAL_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_greater_equal_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_LESS_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_less_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_less_equal_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_AND_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_logical_and_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_OR_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_logical_or_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_XOR_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_logical_xor_c_ext, func_type_map_t::find_type>}), ...); + ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] = + {eft_BLN, (void*)dpnp_not_equal_c_ext, func_type_map_t::find_type>}), ...); +} + +template +static void func_map_logic_2arg_2type_helper(func_map_t& fmap) +{ + ((func_map_logic_2arg_2type_core(fmap)), ...); +} + void func_map_init_logic(func_map_t& fmap) { fmap[DPNPFuncName::DPNP_FN_ALL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_all_default_c}; @@ -378,5 +830,8 @@ void func_map_init_logic(func_map_t& fmap) fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c}; fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c}; + func_map_logic_1arg_1type_helper(fmap); + func_map_logic_2arg_2type_helper(fmap); + return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 4411e207003d..568db448d966 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,6 +37,9 @@ #include "queue_sycl.hpp" #include "dpnp_random_state.hpp" +static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_VERSION_REQUIRED, + "MKL does not meet minimum version requirement"); + namespace mkl_blas = oneapi::mkl::blas; namespace mkl_rng = oneapi::mkl::rng; namespace mkl_vm = oneapi::mkl::vm; @@ -990,11 +993,7 @@ DPCTLSyclEventRef dpnp_rng_multinomial_c(DPCTLSyclQueueRef q_ref, DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result, size, true, true); _DataType* result1 = result_ptr.get_ptr(); -#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER) - std::vector p(p_data, p_data + p_size); -#else auto p = sycl::span{p_data, p_size}; -#endif mkl_rng::multinomial<_DataType> distribution(ntrial, p); // perform generation @@ -1082,13 +1081,8 @@ DPCTLSyclEventRef dpnp_rng_multivariate_normal_c(DPCTLSyclQueueRef q_ref, _DataType* result1 = static_cast<_DataType *>(result); -#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER) - std::vector mean(mean_data, mean_data + mean_size); - std::vector cov(cov_data, cov_data + cov_size); -#else auto mean = sycl::span{mean_data, mean_size}; auto cov = sycl::span{cov_data, cov_size}; -#endif // `result` is a array for random numbers // `size` is a `result`'s len. diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 5b10bc71a8be..742e6dff3783 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -33,6 +33,9 @@ #define BACKEND_FPTR_H #include +#include + +#include #include @@ -64,6 +67,120 @@ const DPNPFuncType eft_C64 = DPNPFuncType::DPNP_FT_CMPLX64; const DPNPFuncType eft_C128 = DPNPFuncType::DPNP_FT_CMPLX128; const DPNPFuncType eft_BLN = DPNPFuncType::DPNP_FT_BOOL; +/** + * An internal structure to build a pair of Data type enum value with C++ type + */ +template +struct func_type_pair_t +{ + using type = T; + + static func_type_pair_t get_pair(std::integral_constant) { return {}; } +}; + +/** + * An internal structure to create a map of Data type enum value associated with C++ type + */ +template +struct func_type_map_factory_t : public Ps... +{ + using Ps::get_pair...; + + template + using find_type = typename decltype(get_pair(std::integral_constant{}))::type; +}; + +/** + * A map of the FPTR interface to link Data type enum value with accociated C++ type + */ +typedef func_type_map_factory_t, + func_type_pair_t, + func_type_pair_t, + func_type_pair_t, + func_type_pair_t, + func_type_pair_t>, + func_type_pair_t>> func_type_map_t; + +/** + * Return an enum value of result type populated from input types. + */ +template +static constexpr DPNPFuncType populate_func_types() +{ + if constexpr (FT1 == DPNPFuncType::DPNP_FT_NONE) + { + throw std::runtime_error("Templated enum value of FT1 is None"); + } + else if constexpr (FT2 == DPNPFuncType::DPNP_FT_NONE) + { + throw std::runtime_error("Templated enum value of FT2 is None"); + } + return (FT1 < FT2) ? FT2 : FT1; +} + +/** + * @brief A helper function to cast SYCL vector between types. + */ +template +static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence) +{ + return Op{v[I]...}; +} + +/** + * @brief A casting function for SYCL vector. + * + * @tparam dstT A result type upon casting. + * @tparam srcT An incoming type of the vector. + * @tparam N A number of elements with the vector. + * @tparam Indices A sequence of integers + * @param s An incoming SYCL vector to cast. + * @return SYCL vector casted to desctination type. + */ +template > +static auto dpnp_vec_cast(const sycl::vec& s) +{ + return dpnp_vec_cast_impl, sycl::vec>(s, Indices{}); +} + +/** + * Removes parentheses for a passed list of types separated by comma. + * It's intended to be used in operations macro. + */ +#define MACRO_UNPACK_TYPES(...) __VA_ARGS__ + +/** + * Implements std::is_same<> with variadic number of types to compare with + * and when type T has to match only one of types Ts. + */ +template +struct is_any : std::disjunction...> {}; + +/** + * Implements std::is_same<> with variadic number of types to compare with + * and when type T has to match every type from Ts sequence. + */ +template +struct are_same : std::conjunction...> {}; + +/** + * A template constat to check if both types T1 and T2 match every type from Ts sequence. + */ +template +constexpr auto both_types_are_same = std::conjunction_v, are_same>; + +/** + * A template constat to check if both types T1 and T2 match any type from Ts. + */ +template +constexpr auto both_types_are_any_of = std::conjunction_v, is_any>; + +/** + * A template constat to check if both types T1 and T2 don't match any type from Ts sequence. + */ +template +constexpr auto none_of_both_types = !std::disjunction_v, is_any>; + /** * FPTR interface initialization functions */ diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp index 33f4d750067f..985d5a61494e 100644 --- a/dpnp/backend/src/dpnp_utils.hpp +++ b/dpnp/backend/src/dpnp_utils.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,15 +45,15 @@ * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and * 20221101L on Windows. */ -#ifndef __SYCL_COMPILER_2023_SWITCHOVER -#define __SYCL_COMPILER_2023_SWITCHOVER 20221102L +#ifndef __SYCL_COMPILER_VERSION_REQUIRED +#define __SYCL_COMPILER_VERSION_REQUIRED 20221102L #endif /** * Version of Intel MKL at which transition to OneMKL release 2023.0.0 occurs. */ -#ifndef __INTEL_MKL_2023_SWITCHOVER -#define __INTEL_MKL_2023_SWITCHOVER 20230000 +#ifndef __INTEL_MKL_2023_VERSION_REQUIRED +#define __INTEL_MKL_2023_VERSION_REQUIRED 20230000 #endif /** diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp index dab09622a698..6c81f5267787 100644 --- a/dpnp/backend/src/dpnpc_memory_adapter.hpp +++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2022, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -85,10 +85,6 @@ class DPNPC_ptr_adapter final std::cerr << "\n\t size_in_bytes=" << size_in_bytes; std::cerr << "\n\t pointer type=" << (long)src_ptr_type; std::cerr << "\n\t queue inorder=" << queue.is_in_order(); -#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER) - std::cerr << "\n\t queue is_host=" << queue.is_host(); - std::cerr << "\n\t queue device is_host=" << queue.get_device().is_host(); -#endif std::cerr << "\n\t queue device is_cpu=" << queue.get_device().is_cpu(); std::cerr << "\n\t queue device is_gpu=" << queue.get_device().is_gpu(); std::cerr << "\n\t queue device is_accelerator=" << queue.get_device().is_accelerator(); diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp index 0810ed0aaba8..55f78230d64e 100644 --- a/dpnp/backend/src/queue_sycl.cpp +++ b/dpnp/backend/src/queue_sycl.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -215,11 +215,6 @@ bool backend_sycl::backend_sycl_is_cpu() if (qptr.get_device().is_cpu()) { return true; } -#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER) - else if (qptr.is_host() || qptr.get_device().is_host()) { - return true; - } -#endif return false; } diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp index af03e1b6f121..8683fdd5737d 100644 --- a/dpnp/backend/src/queue_sycl.hpp +++ b/dpnp/backend/src/queue_sycl.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2016-2020, Intel Corporation +// Copyright (c) 2016-2023, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -113,7 +113,7 @@ class backend_sycl static void backend_sycl_queue_init(QueueOptions selector = QueueOptions::CPU_SELECTOR); /** - * Return True if current @ref queue is related to cpu or host device + * Return True if current @ref queue is related to cpu device */ static bool backend_sycl_is_cpu(); @@ -137,6 +137,13 @@ class backend_sycl #else // temporal solution. Started from Sept-2020 DPCTLSyclQueueRef DPCtrl_queue = DPCTLQueueMgr_GetCurrentQueue(); + if (DPCtrl_queue == nullptr) + { + std::string reason = (DPCTLQueueMgr_GetQueueStackSize() == static_cast(-1)) + ? ": the queue stack is empty, probably no device is available." + : "."; + throw std::runtime_error("Failed to create a copy of SYCL queue with default device" + reason); + } return *(reinterpret_cast(DPCtrl_queue)); #endif } diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx index 859bf49d59a8..dffbf6f65d15 100644 --- a/dpnp/dparray.pyx +++ b/dpnp/dparray.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -462,7 +462,7 @@ cdef class dparray: return ( < long * > self._dparray_data)[lin_idx] elif self.dtype == numpy.int32: return ( < int * > self._dparray_data)[lin_idx] - elif self.dtype == numpy.bool: + elif self.dtype == numpy.bool_: return ( < cpp_bool * > self._dparray_data)[lin_idx] elif self.dtype == numpy.complex128: return ( < double complex * > self._dparray_data)[lin_idx] @@ -489,7 +489,7 @@ cdef class dparray: ( < long * > self._dparray_data)[lin_idx] = value elif self.dtype == numpy.int32: ( < int * > self._dparray_data)[lin_idx] = value - elif self.dtype == numpy.bool: + elif self.dtype == numpy.bool_: ( < cpp_bool * > self._dparray_data)[lin_idx] = < cpp_bool > value elif self.dtype == numpy.complex64: ( < float complex * > self._dparray_data)[lin_idx] = value @@ -876,7 +876,7 @@ cdef class dparray: """ - if not numpy.issubsctype(self.dtype, numpy.complex): + if not numpy.issubsctype(self.dtype, numpy.complex_): return self else: return conjugate(self) @@ -889,7 +889,7 @@ cdef class dparray: """ - if not numpy.issubsctype(self.dtype, numpy.complex): + if not numpy.issubsctype(self.dtype, numpy.complex_): return self else: return conjugate(self) diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index e0c82b6125ce..9bf161b0aaf7 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -127,6 +127,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_EIG_EXT DPNP_FN_EIGVALS DPNP_FN_EIGVALS_EXT + DPNP_FN_EQUAL_EXT DPNP_FN_ERF DPNP_FN_ERF_EXT DPNP_FN_EYE @@ -155,6 +156,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_FMOD_EXT DPNP_FN_FULL DPNP_FN_FULL_LIKE + DPNP_FN_GREATER_EXT + DPNP_FN_GREATER_EQUAL_EXT DPNP_FN_HYPOT DPNP_FN_HYPOT_EXT DPNP_FN_IDENTITY @@ -169,6 +172,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_KRON_EXT DPNP_FN_LEFT_SHIFT DPNP_FN_LEFT_SHIFT_EXT + DPNP_FN_LESS_EXT + DPNP_FN_LESS_EQUAL_EXT DPNP_FN_LOG DPNP_FN_LOG_EXT DPNP_FN_LOG10 @@ -177,6 +182,10 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_LOG1P_EXT DPNP_FN_LOG2 DPNP_FN_LOG2_EXT + DPNP_FN_LOGICAL_AND_EXT + DPNP_FN_LOGICAL_NOT_EXT + DPNP_FN_LOGICAL_OR_EXT + DPNP_FN_LOGICAL_XOR_EXT DPNP_FN_MATMUL DPNP_FN_MATMUL_EXT DPNP_FN_MATRIX_RANK @@ -203,6 +212,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_NEGATIVE_EXT DPNP_FN_NONZERO DPNP_FN_NONZERO_EXT + DPNP_FN_NOT_EQUAL_EXT DPNP_FN_ONES DPNP_FN_ONES_LIKE DPNP_FN_PARTITION @@ -364,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp": struct DPNPFuncData: DPNPFuncType return_type void * ptr + DPNPFuncType return_type_no_fp64 + void *ptr_no_fp64 DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except + @@ -379,7 +391,7 @@ cdef extern from "constants.hpp": cdef extern from "dpnp_iface.hpp": void dpnp_queue_initialize_c(QueueOptions selector) - size_t dpnp_queue_is_cpu_c() + size_t dpnp_queue_is_cpu_c() except + char * dpnp_memory_alloc_c(size_t size_in_bytes) except + void dpnp_memory_free_c(void * ptr) @@ -429,7 +441,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu const shape_elem_type * , const shape_elem_type * , const long * , - const c_dpctl.DPCTLEventVectorRef) + const c_dpctl.DPCTLEventVectorRef) except + ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t) ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef, void *, diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 41f0c0c01026..f12707ccc761 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -221,7 +221,7 @@ cpdef dpnp_queue_initialize(): cpdef dpnp_queue_is_cpu(): - """Return 1 if current queue is CPU or HOST. Return 0 otherwise. + """Return 1 if current queue is CPU. Return 0 otherwise. """ return dpnp_queue_is_cpu_c() @@ -276,7 +276,7 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type): elif type == DPNP_FT_CMPLX128: return numpy.complex128 elif type == DPNP_FT_BOOL: - return numpy.bool + return numpy.bool_ else: utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type) @@ -481,8 +481,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, # get the FPTR data structure cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type) - result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type) - # Create result array cdef shape_type_c x1_shape = x1_obj.shape @@ -495,15 +493,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj) + # get FPTR function and return type + cdef fptr_2in_1out_strides_t func = NULL + cdef DPNPFuncType return_type = DPNP_FT_NONE + if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64: + return_type = kernel_data.return_type + func = < fptr_2in_1out_strides_t > kernel_data.ptr + else: + return_type = kernel_data.return_type_no_fp64 + func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64 + if out is None: """ Create result array with type given by FPTR data """ result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, + return_type, None, device=result_sycl_device, usm_type=result_usm_type, sycl_queue=result_sycl_queue) else: + result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type) if out.dtype != result_type: utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type) if out.shape != result_shape: @@ -517,11 +526,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name, result_obj = result.get_array() - cdef c_dpctl.SyclQueue q = result_obj.sycl_queue + cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() """ Call FPTR function """ - cdef fptr_2in_1out_strides_t func = kernel_data.ptr cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx index c1c24a27747b..cb44a08db598 100644 --- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -37,7 +37,6 @@ and the rest of the library __all__ += [ "dpnp_copy", "dpnp_diag", - "dpnp_eye", "dpnp_geomspace", "dpnp_identity", "dpnp_linspace", @@ -46,8 +45,6 @@ __all__ += [ "dpnp_ptp", "dpnp_trace", "dpnp_tri", - "dpnp_tril", - "dpnp_triu", "dpnp_vander", ] @@ -84,9 +81,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPC const size_t , const int, const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef, - void *, int , const shape_elem_type * , - const c_dpctl.DPCTLEventVectorRef) ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef, const void *, void * , @@ -146,36 +140,6 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k): return result -cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None): - if dtype is None: - dtype = dpnp.float64 - - if M is None: - M = N - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type) - - cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None) - - result_sycl_queue = result.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_eye_t func = kernel_data.ptr - - cdef shape_type_c result_shape = result.shape - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL) - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result - - cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, axis): cdef shape_type_c obj_shape = utils._object_to_tuple(num) cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(obj_shape, dtype, None) @@ -434,7 +398,7 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis return result -cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float): +cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=dpnp.float): if M is None: M = N @@ -460,94 +424,6 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float): return result -cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k): - cdef shape_type_c input_shape = m.shape - cdef shape_type_c result_shape - - if m.ndim == 1: - result_shape = (m.shape[0], m.shape[0]) - else: - result_shape = m.shape - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type) - - m_obj = m.get_array() - - # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=m_obj.sycl_device, - usm_type=m_obj.usm_type, - sycl_queue=m_obj.sycl_queue) - - result_sycl_queue = result.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef custom_1in_1out_func_ptr_t func = kernel_data.ptr - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - m.get_data(), - result.get_data(), - k, - input_shape.data(), - result_shape.data(), - m.ndim, - result.ndim, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result - - -cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k): - cdef shape_type_c input_shape = m.shape - cdef shape_type_c result_shape - - if m.ndim == 1: - result_shape = (m.shape[0], m.shape[0]) - else: - result_shape = m.shape - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type) - - m_obj = m.get_array() - - # ceate result array with type given by FPTR data - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=m_obj.sycl_device, - usm_type=m_obj.usm_type, - sycl_queue=m_obj.sycl_queue) - - result_sycl_queue = result.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef custom_1in_1out_func_ptr_t func = kernel_data.ptr - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - m.get_data(), - result.get_data(), - k, - input_shape.data(), - result_shape.data(), - m.ndim, - result.ndim, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result - - cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing): cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE) diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx index e0b928ddf025..b6ac36db412b 100644 --- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx +++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -166,46 +166,28 @@ cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1): return result -cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i]) - - return result - - -cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i]) +cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="equal") - return result +cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater") -cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i]) - return result +cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater_equal") cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1, @@ -272,103 +254,56 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1): return result -cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i]) - - return result - - -cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i]) - - return result - - -cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - - for i in range(result.size): - result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i]) - - return result - - -cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1): - input1_obj = input1.get_array() - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=input1_obj.sycl_device, - usm_type=input1_obj.usm_type, - sycl_queue=input1_obj.sycl_queue) - - for i in range(result.size): - result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i]) +cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_LESS_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less") - return result +cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal") -cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i]) +cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_AND_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_and") - return result +cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor x_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_1in_1out_strides(DPNP_FN_LOGICAL_NOT_EXT, x_obj, dtype, out, where, func_name="logical_not") -cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i]) +cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_OR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_or") - return result +cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_XOR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_xor") -cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2): - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2) - cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape, - dpnp.bool, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - for i in range(result.size): - result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i]) - return result +cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj, + utils.dpnp_descriptor x2_obj, + object dtype=None, + utils.dpnp_descriptor out=None, + object where=True): + return call_fptr_2in_1out_strides(DPNP_FN_NOT_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="not_equal") diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 82c271fa7d90..c50ed9792720 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -140,7 +140,10 @@ def __bool__(self): return self._array_obj.__bool__() # '__class__', - # '__complex__', + + def __complex__(self): + return self._array_obj.__complex__() + # '__contains__', # '__copy__', # '__deepcopy__', @@ -150,6 +153,12 @@ def __bool__(self): # '__divmod__', # '__doc__', + def __dlpack__(self, stream=None): + return self._array_obj.__dlpack__(stream=stream) + + def __dlpack_device__(self): + return self._array_obj.__dlpack_device__() + def __eq__(self, other): return dpnp.equal(self, other) @@ -187,7 +196,10 @@ def __gt__(self, other): # '__imatmul__', # '__imod__', # '__imul__', - # '__index__', + + def __index__(self): + return self._array_obj.__index__() + # '__init__', # '__init_subclass__', @@ -247,7 +259,10 @@ def __radd__(self, other): # '__rdivmod__', # '__reduce__', # '__reduce_ex__', - # '__repr__', + + def __repr__(self): + return dpt.usm_ndarray_repr(self._array_obj, prefix="array") + # '__rfloordiv__', # '__rlshift__', @@ -264,7 +279,9 @@ def __rmul__(self, other): # '__rpow__', # '__rrshift__', # '__rshift__', - # '__rsub__', + + def __rsub__(self, other): + return dpnp.subtract(other, self) def __rtruediv__(self, other): return dpnp.true_divide(other, self) @@ -292,8 +309,7 @@ def __str__(self): """ - return str(self.asnumpy()) - + return self._array_obj.__str__() def __sub__(self, other): return dpnp.subtract(self, other) @@ -305,6 +321,16 @@ def __truediv__(self, other): # '__xor__', + @staticmethod + def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray): + if not isinstance(usm_ary, dpt.usm_ndarray): + raise TypeError( + f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ary)}" + ) + res = dpnp_array.__new__(dpnp_array) + res._array_obj = usm_ary + return res + def all(self, axis=None, out=None, keepdims=False): """ Returns True if all elements evaluate to True. @@ -493,7 +519,7 @@ def conj(self): """ - if not numpy.issubsctype(self.dtype, numpy.complex): + if not numpy.issubsctype(self.dtype, numpy.complex_): return self else: return dpnp.conjugate(self) @@ -506,7 +532,7 @@ def conjugate(self): """ - if not numpy.issubsctype(self.dtype, numpy.complex): + if not numpy.issubsctype(self.dtype, numpy.complex_): return self else: return dpnp.conjugate(self) diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 93ab716eb59a..75e20f8a0cb6 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -45,8 +45,11 @@ "arange", "asarray", "empty", + "eye", "full", "ones" + "tril", + "triu", "zeros", ] @@ -150,6 +153,33 @@ def full(shape, return dpnp_array(array_obj.shape, buffer=array_obj, order=order) +def eye(N, + M=None, + /, + *, + k=0, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None): + """Validate input parameters before passing them into `dpctl.tensor` module""" + dpu.validate_usm_type(usm_type, allow_none=False) + sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device) + if order is None: + order = 'C' + + """Creates `dpnp_array` with ones on the `k`th diagonal.""" + array_obj = dpt.eye(N, + M, + k=k, + dtype=dtype, + order=order, + usm_type=usm_type, + sycl_queue=sycl_queue_normalized) + return dpnp_array(array_obj.shape, buffer=array_obj, order=order) + + def ones(shape, *, dtype=None, @@ -172,6 +202,18 @@ def ones(shape, return dpnp_array(array_obj.shape, buffer=array_obj, order=order) +def tril(x1, /, *, k=0): + """"Creates `dpnp_array` as lower triangular part of an input array.""" + array_obj = dpt.tril(x1.get_array() if isinstance(x1, dpnp_array) else x1, k) + return dpnp_array(array_obj.shape, buffer=array_obj, order="K") + + +def triu(x1, /, *, k=0): + """"Creates `dpnp_array` as upper triangular part of an input array.""" + array_obj = dpt.triu(x1.get_array() if isinstance(x1, dpnp_array) else x1, k) + return dpnp_array(array_obj.shape, buffer=array_obj, order="K") + + def zeros(shape, *, dtype=None, diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 4e791ad0eaf9..b7cdef8cc615 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -61,15 +61,18 @@ "asnumpy", "astype", "convert_single_elem_array_to_scalar", + "default_float_type", "dpnp_queue_initialize", "dpnp_queue_is_cpu", + "from_dlpack", "get_dpnp_descriptor", "get_include", "get_normalized_queue_device" ] from dpnp import ( - isscalar + isscalar, + float64 ) from dpnp.dpnp_iface_arraycreation import * @@ -191,9 +194,64 @@ def convert_single_elem_array_to_scalar(obj, keepdims=False): return obj +def default_float_type(device=None, sycl_queue=None): + """ + Return a floating type used by default in DPNP depending on device capabilities. + + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where an array of default floating type might be created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + The value ``None`` is interpreted as to use a default device. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue which might be used to create an array of default floating type. + The `sycl_queue` can be ``None`` (the default), which is interpreted as + to get the SYCL queue from `device` keyword if present or to use a default queue. + + Returns + ------- + dt : dtype + A default DPNP floating type. + + """ + + _sycl_queue = get_normalized_queue_device(device=device, sycl_queue=sycl_queue) + return map_dtype_to_device(float64, _sycl_queue.sycl_device) + + +def from_dlpack(obj, /): + """ + Create a dpnp array from a Python object implementing the ``__dlpack__`` + protocol. + + See https://dmlc.github.io/dlpack/latest/ for more details. + + Parameters + ---------- + obj : object + A Python object representing an array that implements the ``__dlpack__`` + and ``__dlpack_device__`` methods. + + Returns + ------- + out : dpnp_array + Returns a new dpnp array containing the data from another array + (obj) with the ``__dlpack__`` method on the same device as object. + + """ + + usm_ary = dpt.from_dlpack(obj) + return dpnp_array._create_from_usm_ndarray(usm_ary) + + def get_dpnp_descriptor(ext_obj, copy_when_strides=True, copy_when_nondefault_queue=True, + alloc_usm_type=None, alloc_queue=None): """ Return True: @@ -214,9 +272,9 @@ def get_dpnp_descriptor(ext_obj, return False # If input object is a scalar, it means it was allocated on host memory. - # We need to copy it to device memory according to compute follows data paradigm. + # We need to copy it to USM memory according to compute follows data paradigm. if isscalar(ext_obj): - ext_obj = array(ext_obj, sycl_queue=alloc_queue) + ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue) # while dpnp functions have no implementation with strides support # we need to create a non-strided copy diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 5fb4d8c7a4da..5b062a346b97 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -42,12 +42,14 @@ import numpy import dpnp +import operator import dpnp.config as config from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * import dpnp.dpnp_container as dpnp_container +import dpctl.tensor as dpt __all__ = [ @@ -530,7 +532,7 @@ def empty_like(x1, Limitations ----------- - Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`. + Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` Parameter ``order`` is supported with values ``"C"`` or ``"F"``. Parameter ``subok`` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -552,7 +554,7 @@ def empty_like(x1, """ - if not isinstance(x1, dpnp.ndarray): + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ('C', 'c', 'F', 'f', None): pass @@ -572,31 +574,43 @@ def empty_like(x1, return call_origin(numpy.empty_like, x1, dtype, order, subok, shape) -def eye(N, M=None, k=0, dtype=None, order='C', **kwargs): +def eye(N, + M=None, + /, + *, + k=0, + dtype=None, + order="C", + like=None, + device=None, + usm_type="device", + sycl_queue=None): """ Return a 2-D array with ones on the diagonal and zeros elsewhere. For full documentation refer to :obj:`numpy.eye`. Limitations ----------- - Input array is supported as :obj:`dpnp.ndarray`. - Parameters ``order`` is supported only with default value. + Parameter ``order`` is supported only with values ``"C"`` and ``"F"``. + Parameter ``like`` is supported only with default value ``None``. + Otherwise the function will be executed sequentially on CPU. + """ - if (not use_origin_backend()): - if not isinstance(N, (int, dpnp.int, dpnp.int32, dpnp.int64)): - pass - elif M is not None and not isinstance(M, (int, dpnp.int, dpnp.int32, dpnp.int64)): - pass - elif not isinstance(k, (int, dpnp.int, dpnp.int32, dpnp.int64)): - pass - elif order != 'C': - pass - elif len(kwargs) != 0: - pass - else: - return dpnp_eye(N, M=M, k=k, dtype=dtype).get_pyobj() + if order not in ('C', 'c', 'F', 'f', None): + pass + elif like is not None: + pass + else: + return dpnp_container.eye(N, + M, + k=k, + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue) - return call_origin(numpy.eye, N, M=M, k=k, dtype=dtype, order=order, **kwargs) + return call_origin(numpy.eye, N, M, k=k, dtype=dtype, order=order, like=None) def frombuffer(buffer, **kwargs): @@ -750,7 +764,7 @@ def full_like(x1, Limitations ----------- - Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`. + Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` Parameter ``order`` is supported only with values ``"C"`` and ``"F"``. Parameter ``subok`` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -771,7 +785,7 @@ def full_like(x1, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] """ - if not isinstance(x1, dpnp.ndarray): + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ('C', 'c', 'F', 'f', None): pass @@ -859,10 +873,8 @@ def identity(n, dtype=None, *, like=None): elif n < 0: pass else: - if dtype is None: - sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None) - dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device) - return dpnp_identity(n, dtype).get_pyobj() + _dtype = dpnp.default_float_type() if dtype is None else dtype + return dpnp_identity(n, _dtype).get_pyobj() return call_origin(numpy.identity, n, dtype=dtype, like=like) @@ -1179,7 +1191,7 @@ def ones_like(x1, Limitations ----------- - Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`. + Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` Parameter ``order`` is supported with values ``"C"`` or ``"F"``. Parameter ``subok`` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -1201,7 +1213,7 @@ def ones_like(x1, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] """ - if not isinstance(x1, dpnp.ndarray): + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ('C', 'c', 'F', 'f', None): pass @@ -1280,7 +1292,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None): return call_origin(numpy.trace, x1, offset, axis1, axis2, dtype, out) -def tri(N, M=None, k=0, dtype=numpy.float, **kwargs): +def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs): """ An array with ones at and below the given diagonal and zeros elsewhere. @@ -1315,15 +1327,13 @@ def tri(N, M=None, k=0, dtype=numpy.float, **kwargs): elif not isinstance(k, int): pass else: - if dtype is numpy.float: - sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None) - dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device) - return dpnp_tri(N, M, k, dtype).get_pyobj() + _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype + return dpnp_tri(N, M, k, _dtype).get_pyobj() return call_origin(numpy.tri, N, M, k, dtype, **kwargs) -def tril(x1, k=0): +def tril(x1, /, *, k=0): """ Lower triangle of an array. @@ -1331,6 +1341,12 @@ def tril(x1, k=0): For full documentation refer to :obj:`numpy.tril`. + Limitations + ----------- + Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions. + Parameter `k` is supported only of integer data type. + Otherwise the function will be executed sequentially on CPU. + Examples -------- >>> import dpnp as np @@ -1342,17 +1358,25 @@ def tril(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - if not isinstance(k, int): - pass - else: - return dpnp_tril(x1_desc, k).get_pyobj() + _k = None + try: + _k = operator.index(k) + except TypeError: + pass + + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + pass + elif x1.ndim < 2: + pass + elif _k is None: + pass + else: + return dpnp_container.tril(x1, k=_k) return call_origin(numpy.tril, x1, k) -def triu(x1, k=0): +def triu(x1, /, *, k=0): """ Upper triangle of an array. @@ -1361,6 +1385,12 @@ def triu(x1, k=0): For full documentation refer to :obj:`numpy.triu`. + Limitations + ----------- + Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions. + Parameter `k` is supported only of integer data type. + Otherwise the function will be executed sequentially on CPU. + Examples -------- >>> import dpnp as np @@ -1372,12 +1402,20 @@ def triu(x1, k=0): """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - if not isinstance(k, int): - pass - else: - return dpnp_triu(x1_desc, k).get_pyobj() + _k = None + try: + _k = operator.index(k) + except TypeError: + pass + + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + pass + elif x1.ndim < 2: + pass + elif _k is None: + pass + else: + return dpnp_container.triu(x1, k=_k) return call_origin(numpy.triu, x1, k) @@ -1494,7 +1532,7 @@ def zeros_like(x1, Limitations ----------- - Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`. + Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` Parameter ``order`` is supported with values ``"C"`` or ``"F"``. Parameter ``subok`` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -1515,8 +1553,8 @@ def zeros_like(x1, >>> [i for i in np.zeros_like(x)] [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] -""" - if not isinstance(x1, dpnp.ndarray): + """ + if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ('C', 'c', 'F', 'f', None): pass diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 0f1e1b5fc0e5..e94b0f6c1efb 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -219,18 +219,32 @@ def any(x1, axis=None, out=None, keepdims=False): return call_origin(numpy.any, x1, axis, out, keepdims) -def equal(x1, x2): +def equal(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 == x2) element-wise. + Return the truth value of (x1 == x2) element-wise. For full documentation refer to :obj:`numpy.equal`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - Parameter ``x1`` is supported as :obj:`dpnp.ndarray`. - Parameter ``x2`` is supported as either :obj:`dpnp.ndarray` or int. - Input array data types are limited by supported DPNP :ref:`Data types`. - Sizes, shapes and data types of input arrays ``x1`` and ``x2`` are supported to be equal. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. + Otherwise the function will be executed sequentially on CPU. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -250,33 +264,57 @@ def equal(x1, x2): [True, True, False] """ - - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size != x2_desc.size: - # pass - # elif x1_desc.dtype != x2_desc.dtype: - # pass - # elif x1_desc.shape != x2_desc.shape: - # pass - # else: - # return dpnp_equal(x1_desc, x2_desc).get_pyobj() - + + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.equal, x1, x2) -def greater(x1, x2): +def greater(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 > x2) element-wise. + Return the truth value of (x1 > x2) element-wise. For full documentation refer to :obj:`numpy.greater`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -297,30 +335,56 @@ def greater(x1, x2): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size < 2: - # pass - # elif x2_desc.size < 2: - # pass - # else: - # return dpnp_greater(x1_desc, x2_desc).get_pyobj() - + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_greater(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.greater, x1, x2) -def greater_equal(x1, x2): +def greater_equal(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 >= x2) element-wise. + Return the truth value of (x1 >= x2) element-wise. For full documentation refer to :obj:`numpy.greater_equal`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -341,16 +405,27 @@ def greater_equal(x1, x2): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size < 2: - # pass - # elif x2_desc.size < 2: - # pass - # else: - # return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj() - + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.greater_equal, x1, x2) @@ -532,17 +607,32 @@ def isnan(x1, out=None, **kwargs): return call_origin(numpy.isnan, x1, out, **kwargs) -def less(x1, x2): +def less(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 < x2) element-wise. + Return the truth value of (x1 < x2) element-wise. For full documentation refer to :obj:`numpy.less`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -563,30 +653,56 @@ def less(x1, x2): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size < 2: - # pass - # elif x2_desc.size < 2: - # pass - # else: - # return dpnp_less(x1_desc, x2_desc).get_pyobj() - + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_less(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.less, x1, x2) -def less_equal(x1, x2): +def less_equal(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 <= x2) element-wise. + Return the truth value of (x1 <= x2) element-wise. For full documentation refer to :obj:`numpy.less_equal`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -607,32 +723,56 @@ def less_equal(x1, x2): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size < 2: - # pass - # elif x2_desc.size < 2: - # pass - # else: - # return dpnp_less_equal(x1_desc, x2_desc).get_pyobj() - + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_less_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.less_equal, x1, x2) -def logical_and(x1, x2, out=None, **kwargs): +def logical_and(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ Compute the truth value of x1 AND x2 element-wise. For full documentation refer to :obj:`numpy.logical_and`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise logical comparison of `x1` and `x2`. + Limitations ----------- - Input arrays are supported as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - Parameter ``out`` is supported only with default value ``None``. - Parameter ``where`` is supported only with default value ``True``. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -652,30 +792,55 @@ def logical_and(x1, x2, out=None, **kwargs): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc and not kwargs: - # if out is not None: - # pass - # else: - # return dpnp_logical_and(x1_desc, x2_desc).get_pyobj() - - return call_origin(numpy.logical_and, x1, x2, out, **kwargs) - - -def logical_not(x1, out=None, **kwargs): + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_logical_and(x1_desc, x2_desc).get_pyobj() + return call_origin(numpy.logical_and, x1, x2) + + +def logical_not(x, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ Compute the truth value of NOT x element-wise. For full documentation refer to :obj:`numpy.logical_not`. + Returns + ------- + out : dpnp.ndarray + Boolean result with the same shape as `x` of the NOT operation + on elements of `x`. + Limitations ----------- - Input array is supported as :obj:`dpnp.ndarray`. + Parameters `x` is only supported as :class:`dpnp.ndarray`. + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - Parameter ``out`` is supported only with default value ``None``. - Parameter ``where`` is supported only with default value ``True``. + Input array data type is limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -693,29 +858,47 @@ def logical_not(x1, out=None, **kwargs): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # if x1_desc and not kwargs: - # if out is not None: - # pass - # else: - # return dpnp_logical_not(x1_desc).get_pyobj() - - return call_origin(numpy.logical_not, x1, out, **kwargs) - - -def logical_or(x1, x2, out=None, **kwargs): + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + else: + x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False) + if x1_desc: + return dpnp_logical_not(x1_desc).get_pyobj() + return call_origin(numpy.logical_not, x) + + +def logical_or(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ Compute the truth value of x1 OR x2 element-wise. For full documentation refer to :obj:`numpy.logical_or`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise logical comparison of `x1` and `x2`. + Limitations ----------- - Input arrays are supported as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - Parameter ``out`` is supported only with default value ``None``. - Parameter ``where`` is supported only with default value ``True``. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -735,30 +918,56 @@ def logical_or(x1, x2, out=None, **kwargs): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc and not kwargs: - # if out is not None: - # pass - # else: - # return dpnp_logical_or(x1_desc, x2_desc).get_pyobj() - - return call_origin(numpy.logical_or, x1, x2, out, **kwargs) - - -def logical_xor(x1, x2, out=None, **kwargs): + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_logical_or(x1_desc, x2_desc).get_pyobj() + return call_origin(numpy.logical_or, x1, x2) + + +def logical_xor(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Compute the truth value of x1 XOR x2, element-wise. + Compute the truth value of x1 XOR x2 element-wise. For full documentation refer to :obj:`numpy.logical_xor`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise logical comparison of `x1` and `x2`. + Limitations ----------- - Input arrays are supported as :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - Parameter ``out`` is supported only with default value ``None``. - Parameter ``where`` is supported only with default value ``True``. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -778,29 +987,56 @@ def logical_xor(x1, x2, out=None, **kwargs): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc and not kwargs: - # if out is not None: - # pass - # else: - # return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj() - - return call_origin(numpy.logical_xor, x1, x2, out, **kwargs) - - -def not_equal(x1, x2): + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj() + return call_origin(numpy.logical_xor, x1, x2) + + +def not_equal(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True): """ - Return (x1 != x2) element-wise. + Return the truth value of (x1 != x2) element-wise. For full documentation refer to :obj:`numpy.not_equal`. + Returns + ------- + out : dpnp.ndarray + Output array of bool type, element-wise comparison of `x1` and `x2`. + Limitations ----------- - At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`. - If either ``x1`` or ``x2`` is scalar then other one should be :obj:`dpnp.ndarray`. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Otherwise the function will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Input array data types are limited by supported DPNP :ref:`Data types`, + excluding `dpnp.complex64` and `dpnp.complex128`. See Also -------- @@ -821,16 +1057,25 @@ def not_equal(x1, x2): """ - # x1_desc = dpnp.get_dpnp_descriptor(x1) - # x2_desc = dpnp.get_dpnp_descriptor(x2) - # if x1_desc and x2_desc: - # if x1_desc.size < 2: - # pass - # elif x2_desc.size < 2: - # pass - # else: - # result = dpnp_not_equal(x1_desc, x2_desc).get_pyobj() - - # return result - + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) + + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_not_equal(x1_desc, x2_desc).get_pyobj() return call_origin(numpy.not_equal, x1, x2) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 26b81a67dd95..feff53288cfd 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -154,58 +154,68 @@ def absolute(x1, **kwargs): return call_origin(numpy.absolute, x1, **kwargs) -def add(x1, x2, dtype=None, out=None, where=True, **kwargs): +def add(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Add arguments element-wise. For full documentation refer to :obj:`numpy.add`. + Returns + ------- + y : dpnp.ndarray + The sum of `x1` and `x2`, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Examples -------- - >>> import dpnp as np - >>> a = np.array([1, 2, 3]) - >>> b = np.array([1, 2, 3]) - >>> result = np.add(a, b) - >>> [x for x in result] + >>> import dpnp as dp + >>> a = dp.array([1, 2, 3]) + >>> b = dp.array([1, 2, 3]) + >>> result = dp.add(a, b) + >>> print(result) [2, 4, 6] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None - return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj() + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def around(x1, decimals=0, out=None): @@ -534,55 +544,66 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue): return call_origin(numpy.diff, x1, n=n, axis=axis, prepend=prepend, append=append) -def divide(x1, x2, dtype=None, out=None, where=True, **kwargs): +def divide(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Divide arguments element-wise. For full documentation refer to :obj:`numpy.divide`. + Returns + ------- + y : dpnp.ndarray + The quotient ``x1/x2``, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Examples -------- - >>> import dpnp as np - >>> result = np.divide(np.array([1, -2, 6, -9]), np.array([-2, -2, -2, -2])) - >>> [x for x in result] + >>> import dpnp as dp + >>> result = dp.divide(dp.array([1, -2, 6, -9]), dp.array([-2, -2, -2, -2])) + >>> print(result) [-0.5, 1.0, -3.0, 4.5] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.divide, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def ediff1d(x1, to_end=None, to_begin=None): @@ -1093,11 +1114,11 @@ def multiply(x1, ------- y : {dpnp.ndarray, scalar} The product of `x1` and `x2`, element-wise. - The result is a scalar if both x1 and x2 are scalars. Limitations ----------- - Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. Otherwise the functions will be executed sequentially on CPU. @@ -1122,18 +1143,20 @@ def multiply(x1, elif subok is not True: pass elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # keep the result in host memory, if both inputs are scalars - return x1 * x2 + # at least either x1 or x2 has to be an array + pass else: - # get a common queue to copy data from the host into a device if any input is scalar - queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue) + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) if x1_desc and x2_desc: return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.multiply, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def nancumprod(x1, **kwargs): @@ -1508,60 +1531,69 @@ def sign(x1, **kwargs): return call_origin(numpy.sign, x1, **kwargs) -def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs): +def subtract(x1, + x2, + /, + out=None, + *, + where=True, + dtype=None, + subok=True, + **kwargs): """ Subtract arguments, element-wise. For full documentation refer to :obj:`numpy.subtract`. + Returns + ------- + y : dpnp.ndarray + The difference of `x1` and `x2`, element-wise. + Limitations ----------- - Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar. - Parameters ``dtype``, ``out`` and ``where`` are supported with their default values. + Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar, + but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`). + Parameters `out`, `where`, `dtype` and `subok` are supported with their default values. Keyword arguments ``kwargs`` are currently unsupported. - Otherwise the functions will be executed sequentially on CPU. + Otherwise the function will be executed sequentially on CPU. Input array data types are limited by supported DPNP :ref:`Data types`. Example ------- - >>> import dpnp as np - >>> result = np.subtract(np.array([4, 3]), np.array([2, 7])) - >>> [x for x in result] + >>> import dpnp as dp + >>> result = dp.subtract(dp.array([4, 3]), dp.array([2, 7])) + >>> print(result) [2, -4] """ - x1_is_scalar = dpnp.isscalar(x1) - x2_is_scalar = dpnp.isscalar(x2) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False) + if out is not None: + pass + elif where is not True: + pass + elif dtype is not None: + pass + elif subok is not True: + pass + elif dpnp.isscalar(x1) and dpnp.isscalar(x2): + # at least either x1 or x2 has to be an array + pass + else: + # get USM type and queue to copy scalar from the host memory into a USM allocation + usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None) - if x1_desc and x2_desc and not kwargs: - if not x1_desc and not x1_is_scalar: - pass - elif not x2_desc and not x2_is_scalar: - pass - elif x1_is_scalar and x2_is_scalar: - pass - elif x1_desc and x1_desc.ndim == 0: - pass - elif x1_desc and x1_desc.dtype == numpy.bool: - pass - elif x2_desc and x2_desc.ndim == 0: - pass - elif x2_desc and x2_desc.dtype == numpy.bool: - pass - elif dtype is not None: - pass - elif out is not None: - pass - elif not where: - pass - else: - out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None - return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj() + x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, + alloc_usm_type=usm_type, alloc_queue=queue) + if x1_desc and x2_desc: + if x1_desc.dtype == x2_desc.dtype == dpnp.bool: + raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, " + "use the bitwise_xor, the `^` operator, or the logical_xor function instead.") + return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj() - return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs) + return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs) def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True): diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 27eaf4a115f5..ab92f8cc6251 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -299,7 +299,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights= return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights) -def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): +def histogram(a, bins=10, range=None, density=None, weights=None): """ Compute the histogram of a dataset. For full documentation refer to :obj:`numpy.histogram`. @@ -323,7 +323,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): 1.0 """ - return call_origin(numpy.histogram, a=a, bins=bins, range=range, normed=normed, weights=weights, density=density) + return call_origin(numpy.histogram, a=a, bins=bins, range=range, density=density, weights=weights) def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True): diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py index dfcf599bf3be..a39cfa47cd12 100644 --- a/dpnp/dpnp_iface_types.py +++ b/dpnp/dpnp_iface_types.py @@ -2,7 +2,7 @@ # distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,12 +36,12 @@ import numpy + __all__ = [ "bool", "bool_", "complex128", "complex64", - "default_float_type", "dtype", "float", "float16", @@ -59,7 +59,7 @@ "void" ] -bool = numpy.bool +bool = numpy.bool_ bool_ = numpy.bool_ complex128 = numpy.complex128 complex64 = numpy.complex64 @@ -67,18 +67,14 @@ float16 = numpy.float16 float32 = numpy.float32 float64 = numpy.float64 -float = numpy.float +float = numpy.float_ int32 = numpy.int32 int64 = numpy.int64 integer = numpy.integer -int = numpy.int +int = numpy.int_ longcomplex = numpy.longcomplex -def default_float_type(): - return float64 - - def isscalar(obj): """ Returns True if the type of `obj` is a scalar type. diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd index 0924dae26408..db7127319bb0 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -123,7 +123,7 @@ cdef class dpnp_descriptor: cdef void * get_data(self) -cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) +cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except * """ Calculate common shape from input shapes """ diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index c09bef8ec485..672aa19e4dcb 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -34,7 +34,7 @@ This module contains differnt helpers and utilities import numpy import dpctl -import dpctl.tensor as dpt +import dpctl.utils as dpu import dpnp.config as config import dpnp.dpnp_container as dpnp_container @@ -70,7 +70,7 @@ __all__ = [ "dpnp_descriptor", "get_axis_indeces", "get_axis_offsets", - "get_common_allocation_queue", + "get_usm_allocations", "_get_linear_index", "map_dtype_to_device", "normalize_axis", @@ -163,9 +163,9 @@ def call_origin(function, *args, **kwargs): kwargx = convert_item(kwarg) kwargs_new[key] = kwargx - exec_q = dpctl.utils.get_execution_queue(alloc_queues) + exec_q = dpu.get_execution_queue(alloc_queues) if exec_q is None: - exec_q = sycl_queue + exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue) # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}") # TODO need to put array memory into NumPy call result_origin = function(*args_new, **kwargs_new) @@ -220,30 +220,49 @@ def unwrap_array(x1): return x1 -def get_common_allocation_queue(objects): - """ - Given a list of objects returns the queue which can be used for a memory allocation - to follow compute follows data paradigm, or returns `None` if the default queue can be used. - An exception will be raised, if the paradigm is broked for the given list of objects. - """ - if not isinstance(objects, (list, tuple)): - raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) - - if len(objects) == 0: +def _get_coerced_usm_type(objects): + types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")] + if len(types_in_use) == 0: return None + elif len(types_in_use) == 1: + return types_in_use[0] + + common_usm_type = dpu.get_coerced_usm_type(types_in_use) + if common_usm_type is None: + raise ValueError("Input arrays must have coerced USM types") + return common_usm_type + +def _get_common_allocation_queue(objects): queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")] if len(queues_in_use) == 0: return None elif len(queues_in_use) == 1: return queues_in_use[0] - common_queue = dpt.get_execution_queue(queues_in_use) + common_queue = dpu.get_execution_queue(queues_in_use) if common_queue is None: raise ValueError("Input arrays must be allocated on the same SYCL queue") return common_queue +def get_usm_allocations(objects): + """ + Given a list of objects returns a tuple of USM type and SYCL queue + which can be used for a memory allocation and to follow compute follows data paradigm, + or returns `(None, None)` if the default USM type and SYCL queue can be used. + An exception will be raised, if the paradigm is broked for the given list of objects. + + """ + + if not isinstance(objects, (list, tuple)): + raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) + + if len(objects) == 0: + return (None, None) + return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects)) + + def map_dtype_to_device(dtype, device): """ Map an input ``dtype`` with type ``device`` may use @@ -399,7 +418,7 @@ cdef tuple get_shape_dtype(object input_obj): # shape and dtype does not match with siblings. if ((return_shape != elem_shape) or (return_dtype != elem_dtype)): - return (elem_shape, numpy.dtype(numpy.object)) + return (elem_shape, numpy.dtype(numpy.object_)) list_shape.push_back(len(input_obj)) list_shape.insert(list_shape.end(), return_shape.begin(), return_shape.end()) @@ -429,7 +448,9 @@ cpdef find_common_type(object x1_obj, object x2_obj): return numpy.find_common_type(array_types, scalar_types) -cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape): +cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *: + cdef shape_type_c input1_shape_orig = input1_shape + cdef shape_type_c input2_shape_orig = input2_shape cdef shape_type_c result_shape # ex (8, 1, 6, 1) and (7, 1, 5) -> (8, 1, 6, 1) and (1, 7, 1, 5) @@ -446,9 +467,9 @@ cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input elif input2_shape[it] == 1: result_shape.push_back(input1_shape[it]) else: - err_msg = f"{ERROR_PREFIX} in function get_common_shape()" - err_msg += f"operands could not be broadcast together with shapes {input1_shape} {input2_shape}" - ValueError(err_msg) + err_msg = f"{ERROR_PREFIX} in function get_common_shape(): " + err_msg += f"operands could not be broadcast together with shapes {input1_shape_orig} {input2_shape_orig}" + raise ValueError(err_msg) return result_shape @@ -629,10 +650,7 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2): "could not recognize common USM type for inputs of USM types {} and {}" "".format(array1_obj.usm_type, array2_obj.usm_type)) - common_sycl_queue = dpctl.utils.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue)) - # TODO: refactor, remove when CFD is implemented in all array constructors - if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context: - common_sycl_queue = array1_obj.sycl_queue + common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue)) if common_sycl_queue is None: raise ValueError( "could not recognize common SYCL queue for inputs in SYCL queues {} and {}" diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py index 677f2a7e94bc..ade85bb2fe18 100644 --- a/dpnp/random/dpnp_iface_random.py +++ b/dpnp/random/dpnp_iface_random.py @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -98,11 +98,20 @@ ] -def _get_random_state(): - global _dpnp_random_state - if _dpnp_random_state is None: - _dpnp_random_state = RandomState() - return _dpnp_random_state +def _get_random_state(device=None, sycl_queue=None): + global _dpnp_random_states + + if not isinstance(_dpnp_random_states, dict): + _dpnp_random_states = dict() + sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue) + if sycl_queue not in _dpnp_random_states: + rs = RandomState(device=device, sycl_queue=sycl_queue) + if sycl_queue == rs.get_sycl_queue(): + _dpnp_random_states[sycl_queue] = rs + else: + raise RuntimeError("Normalized SYCL queue {} mismatched with one returned by RandmoState {}" + .format(sycl_queue, rs.get_sycl_queue())) + return _dpnp_random_states[sycl_queue] def beta(a, b, size=None): @@ -774,20 +783,42 @@ def negative_binomial(n, p, size=None): return call_origin(numpy.random.negative_binomial, n, p, size) -def normal(loc=0.0, scale=1.0, size=None, usm_type='device'): +def normal(loc=0.0, + scale=1.0, + size=None, + device=None, + usm_type="device", + sycl_queue=None): """ - Normal distribution. - Draw random samples from a normal (Gaussian) distribution. For full documentation refer to :obj:`numpy.random.normal`. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Drawn samples from the parameterized normal distribution. + Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default), + :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise. + Limitations ----------- - Parameters ``loc`` and ``scale`` are supported as scalar. + Parameters `loc` and `scale` are supported as scalar. Otherwise, :obj:`numpy.random.normal(loc, scale, size)` samples are drawn. - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameter `dtype` is supported only as :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``. Examples -------- @@ -796,11 +827,9 @@ def normal(loc=0.0, scale=1.0, size=None, usm_type='device'): >>> s = dpnp.random.normal(mu, sigma, 1000) """ - return _get_random_state().normal(loc=loc, - scale=scale, - size=size, - dtype=None, - usm_type=usm_type) + + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.normal(loc=loc, scale=scale, size=size, dtype=None, usm_type=usm_type) def noncentral_chisquare(df, nonc, size=None): @@ -986,7 +1015,11 @@ def power(a, size=None): return call_origin(numpy.random.power, a, size) -def rand(d0, *dn, usm_type="device"): +def rand(d0, + *dn, + device=None, + usm_type="device", + sycl_queue=None): """ Random values in a given shape. @@ -995,10 +1028,24 @@ def rand(d0, *dn, usm_type="device"): For full documentation refer to :obj:`numpy.random.rand`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Random values in a given shape. + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1012,20 +1059,48 @@ def rand(d0, *dn, usm_type="device"): """ - return _get_random_state().rand(d0, *dn, usm_type=usm_type) + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.rand(d0, *dn, usm_type=usm_type) -def randint(low, high=None, size=None, dtype=int, usm_type="device"): +def randint(low, + high=None, + size=None, + dtype=int, + device=None, + usm_type="device", + sycl_queue=None): """ Return random integers from `low` (inclusive) to `high` (exclusive). For full documentation refer to :obj:`numpy.random.randint`. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + `size`-shaped array of random integers from the appropriate distribution, + or a single such random int if `size` is not provided. + Output array data type is the same as input `dtype`. + Limitations ----------- - Parameters ``low`` and ``high`` are supported only as scalar. - Parameter ``dtype`` is supported only as `int`. - Otherwise, :obj:`numpy.random.randint(low, high, size, dtype)` samples are drawn. + Parameters `low` and `high` are supported only as a scalar. + Parameter `dtype` is supported only as :obj:`dpnp.int32` or ``int``, + but ``int`` value is considered to be exactly equivalent to :obj:`dpnp.int32`. + Otherwise, :obj:`numpy.random.RandomState.randint(low, high, size, dtype)` samples are drawn. Examples -------- @@ -1041,23 +1116,39 @@ def randint(low, high=None, size=None, dtype=int, usm_type="device"): """ - return _get_random_state().randint(low=low, - high=high, - size=size, - dtype=dtype, - usm_type=usm_type) + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.randint(low=low, high=high, size=size, dtype=dtype, usm_type=usm_type) -def randn(d0, *dn, usm_type="device"): +def randn(d0, + *dn, + device=None, + usm_type="device", + sycl_queue=None): """ Return a sample (or samples) from the "standard normal" distribution. For full documentation refer to :obj:`numpy.random.randn`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from + the standard normal distribution, or a single such float if no parameters were supplied. + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1075,20 +1166,38 @@ def randn(d0, *dn, usm_type="device"): """ - return _get_random_state().randn(d0, *dn, usm_type=usm_type) + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.randn(d0, *dn, usm_type=usm_type) -def random(size=None, usm_type="device"): +def random(size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Return random floats in the half-open interval [0.0, 1.0). Alias for random_sample. For full documentation refer to :obj:`numpy.random.random`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned). + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1102,20 +1211,43 @@ def random(size=None, usm_type="device"): """ - return random_sample(size=size, usm_type=usm_type) + return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue) -def random_integers(low, high=None, size=None, usm_type="device"): +def random_integers(low, + high=None, + size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Random integers between `low` and `high`, inclusive. For full documentation refer to :obj:`numpy.random.random_integers`. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + `size`-shaped array of random integers from the appropriate distribution, + or a single such random int if `size` is not provided. + Limitations ----------- - Parameters ``low`` and ``high`` are supported as scalar. - Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples - are drawn. + Parameters `low` and `high` are supported as scalar. + Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples are drawn. See Also -------- @@ -1134,12 +1266,15 @@ def random_integers(low, high=None, size=None, usm_type="device"): elif not dpnp.isscalar(high): pass else: - return randint(low, int(high) + 1, size=size, usm_type=usm_type) + return randint(low, int(high) + 1, size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue) return call_origin(numpy.random.random_integers, low, high, size) -def random_sample(size=None, usm_type="device"): +def random_sample(size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Return random floats in the half-open interval [0.0, 1.0). @@ -1147,10 +1282,24 @@ def random_sample(size=None, usm_type="device"): For full documentation refer to :obj:`numpy.random.random_sample`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned). + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1164,21 +1313,38 @@ def random_sample(size=None, usm_type="device"): """ - return _get_random_state().random_sample(size=size, - usm_type=usm_type) + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.random_sample(size=size, usm_type=usm_type) -def ranf(size=None, usm_type="device"): +def ranf(size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Return random floats in the half-open interval [0.0, 1.0). This is an alias of random_sample. For full documentation refer to :obj:`numpy.random.ranf`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned). + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1193,7 +1359,7 @@ def ranf(size=None, usm_type="device"): """ - return random_sample(size=size, usm_type=usm_type) + return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue) def rayleigh(scale=1.0, size=None): @@ -1230,17 +1396,34 @@ def rayleigh(scale=1.0, size=None): return call_origin(numpy.random.rayleigh, scale, size) -def sample(size=None, usm_type="device"): +def sample(size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Return random floats in the half-open interval [0.0, 1.0). This is an alias of random_sample. For full documentation refer to :obj:`numpy.random.sample`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned). + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1255,7 +1438,7 @@ def sample(size=None, usm_type="device"): """ - return random_sample(size=size, usm_type=usm_type) + return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue) def shuffle(x1): @@ -1283,18 +1466,35 @@ def shuffle(x1): return -def seed(seed=None): +def seed(seed=None, + device=None, + sycl_queue=None): """ - Reseed a legacy mt19937 random number generator engine. + Reseed a legacy MT19937 random number generator engine. + + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where an array with generated numbers will be created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for an array with generated numbers. Limitations ----------- - Parameter ``seed`` is supported as a scalar. - Otherwise, the function will use :obj:`numpy.random.seed` on the backend - and will be executed on fallback backend. + Parameter `seed` is supported as either a scalar or an array of maximumum three integer scalars. """ + # update a mt19937 random number for both RandomState and legacy functionality + global _dpnp_random_states + + sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue) + _dpnp_random_states[sycl_queue] = RandomState(seed=seed, sycl_queue=sycl_queue) + if not use_origin_backend(seed): # TODO: # array_like of ints for `seed` @@ -1307,10 +1507,6 @@ def seed(seed=None): else: # TODO: # migrate to a single approach with RandomState class - - # update a mt19937 random number for both RandomState and legacy functionality - global _dpnp_random_state - _dpnp_random_state = RandomState(seed) dpnp_rng_srand(seed) # always reseed numpy engine also @@ -1405,17 +1601,34 @@ def standard_gamma(shape, size=None): return call_origin(numpy.random.standard_gamma, shape, size) -def standard_normal(size=None, usm_type="device"): - """Standard normal distribution. - +def standard_normal(size=None, + device=None, + usm_type="device", + sycl_queue=None): + """ Draw samples from a standard Normal distribution (mean=0, stdev=1). For full documentation refer to :obj:`numpy.random.standard_normal`. - Limitations - ----------- - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + A floating-point array of shape `size` of drawn samples, or a + single sample if `size` was not specified. + Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise. Examples -------- @@ -1423,7 +1636,9 @@ def standard_normal(size=None, usm_type="device"): >>> s = dpnp.random.standard_normal(1000) """ - return _get_random_state().standard_normal(size=size, usm_type=usm_type) + + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.standard_normal(size=size, usm_type=usm_type) def standard_t(df, size=None): @@ -1506,18 +1721,45 @@ def triangular(left, mode, right, size=None): return call_origin(numpy.random.triangular, left, mode, right, size) -def uniform(low=0.0, high=1.0, size=None, usm_type='device'): +def uniform(low=0.0, + high=1.0, + size=None, + device=None, + usm_type="device", + sycl_queue=None): """ Draw samples from a uniform distribution. + Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high). + In other words, any value within the given interval is equally likely to be drawn by uniform. + For full documentation refer to :obj:`numpy.random.uniform`. + Parameters + ---------- + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Drawn samples from the parameterized uniform distribution. + Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default), + :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise. + Limitations ----------- - Parameters ``low`` and ``high`` are supported as scalar. - Otherwise, :obj:`numpy.random.uniform(low, high, size)` samples are drawn. - Output array data type is :obj:`dpnp.float64` if device supports it - or :obj:`dpnp.float32` otherwise. + Parameters `low` and `high` are supported as a scalar. Otherwise, + :obj:`numpy.random.uniform(low, high, size)` samples are drawn. + Parameter `dtype` is supported only as :obj:`dpnp.int32`, :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``. Examples -------- @@ -1530,11 +1772,9 @@ def uniform(low=0.0, high=1.0, size=None, usm_type='device'): :obj:`dpnp.random.random` : Floats uniformly distributed over ``[0, 1)``. """ - return _get_random_state().uniform(low=low, - high=high, - size=size, - dtype=None, - usm_type=usm_type) + + rs = _get_random_state(device=device, sycl_queue=sycl_queue) + return rs.uniform(low=low, high=high, size=size, dtype=None, usm_type=usm_type) def vonmises(mu, kappa, size=None): @@ -1679,4 +1919,4 @@ def zipf(a, size=None): return call_origin(numpy.random.zipf, a, size) -_dpnp_random_state = None +_dpnp_random_states = {} diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py index 1d4648c31c47..c224553b0cff 100644 --- a/dpnp/random/dpnp_random_state.py +++ b/dpnp/random/dpnp_random_state.py @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -76,7 +76,12 @@ class RandomState: """ def __init__(self, seed=None, device=None, sycl_queue=None): - self._seed = 1 if seed is None else seed + if seed is None: + # ask NumPy to generate an array of three random integers as default seed value + self._seed = numpy.random.randint(low=0, high=numpy.iinfo(numpy.int32).max + 1, size=3) + else: + self._seed = seed + self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue) self._sycl_device = self._sycl_queue.sycl_device @@ -290,7 +295,7 @@ def rand(self, *args, usm_type="device"): def randint(self, low, high=None, size=None, dtype=int, usm_type="device"): """ - Draw random integers from low (inclusive) to high (exclusive). + Draw random integers from `low` (inclusive) to `high` (exclusive). Return random integers from the “discrete uniform” distribution of the specified type in the “half-open” interval [low, high). @@ -332,7 +337,7 @@ def randint(self, low, high=None, size=None, dtype=int, usm_type="device"): if not use_origin_backend(low): if not dpnp.isscalar(low): pass - elif not dpnp.isscalar(high): + elif not (high is None or dpnp.isscalar(high)): pass else: _dtype = dpnp.int32 if dtype is int else dpnp.dtype(dtype) diff --git a/dpnp/version.py b/dpnp/version.py index 160e8ec963a8..f09ea3c76a75 100644 --- a/dpnp/version.py +++ b/dpnp/version.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -29,6 +29,6 @@ DPNP version module """ -__version__: str = '0.11.0' +__version__: str = '0.11.1' version: str = __version__ diff --git a/examples/example4.py b/examples/example4.py index 0790f84d10aa..6705149d52ba 100755 --- a/examples/example4.py +++ b/examples/example4.py @@ -1,7 +1,7 @@ # cython: language_level=3 # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ """ for function in [numpy.sqrt, numpy.fabs, numpy.reciprocal, numpy.square, numpy.cbrt, numpy.degrees, numpy.radians]: print() - for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]: + for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]: data = numpy.array([1, 2, 3, 4], dtype=test_type) result = function(data) print(f"input:{data.dtype.name:10}: outout:{result.dtype.name:10}: name:{function.__name__}") @@ -50,8 +50,8 @@ """ for function in [numpy.equal, numpy.arctan2]: print() - for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]: - for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]: + for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]: + for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]: data1 = numpy.array([1, 2, 3, 4], dtype=input1_type) data2 = numpy.array([11, 21, 31, 41], dtype=input2_type) result = function(data1, data2) diff --git a/tests/conftest.py b/tests/conftest.py index 78d3180bac08..22276f125f26 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2020, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -77,3 +77,22 @@ def pytest_collection_modifyitems(config, items): @pytest.fixture def allow_fall_back_on_numpy(monkeypatch): monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0) + +@pytest.fixture +def suppress_divide_numpy_warnings(): + # divide: treatment for division by zero (infinite result obtained from finite numbers) + old_settings = numpy.seterr(divide='ignore') + yield + numpy.seterr(**old_settings) # reset to default + +@pytest.fixture +def suppress_invalid_numpy_warnings(): + # invalid: treatment for invalid floating-point operation + # (result is not an expressible number, typically indicates that a NaN was produced) + old_settings = numpy.seterr(invalid='ignore') + yield + numpy.seterr(**old_settings) # reset to default + +@pytest.fixture +def suppress_divide_invalid_numpy_warnings(suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings): + yield diff --git a/tests/helper.py b/tests/helper.py new file mode 100644 index 000000000000..17c62cecd289 --- /dev/null +++ b/tests/helper.py @@ -0,0 +1,39 @@ +import dpctl +import dpnp + + +def get_all_dtypes(no_bool=False, + no_float16=True, + no_complex=False, + no_none=False, + device=None): + """ + Build a list of types supported by DPNP based on input flags and device capabilities. + """ + + dev = dpctl.select_default_device() if device is None else device + + # add boolean type + dtypes = [dpnp.bool] if not no_bool else [] + + # add integer types + dtypes.extend([dpnp.int32, dpnp.int64]) + + # add floating types + if not no_float16 and dev.has_aspect_fp16: + dtypes.append(dpnp.float16) + + dtypes.append(dpnp.float32) + if dev.has_aspect_fp64: + dtypes.append(dpnp.float64) + + # add complex types + if not no_complex: + dtypes.append(dpnp.complex64) + if dev.has_aspect_fp64: + dtypes.append(dpnp.complex128) + + # add None value to validate a default dtype + if not no_none: + dtypes.append(None) + return dtypes diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index b8b02e95bbfb..2f0334077a06 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -35,54 +35,42 @@ tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpct tests/test_arraymanipulation.py::TestHstack::test_generator tests/test_arraymanipulation.py::TestVstack::test_generator + tests/test_dparray.py::test_astype[[]-float64-float64] tests/test_dparray.py::test_astype[[]-float64-float32] tests/test_dparray.py::test_astype[[]-float64-int64] tests/test_dparray.py::test_astype[[]-float64-int32] tests/test_dparray.py::test_astype[[]-float64-bool] -tests/test_dparray.py::test_astype[[]-float64-bool_] tests/test_dparray.py::test_astype[[]-float64-complex] tests/test_dparray.py::test_astype[[]-float32-float64] tests/test_dparray.py::test_astype[[]-float32-float32] tests/test_dparray.py::test_astype[[]-float32-int64] tests/test_dparray.py::test_astype[[]-float32-int32] tests/test_dparray.py::test_astype[[]-float32-bool] -tests/test_dparray.py::test_astype[[]-float32-bool_] tests/test_dparray.py::test_astype[[]-float32-complex] tests/test_dparray.py::test_astype[[]-int64-float64] tests/test_dparray.py::test_astype[[]-int64-float32] tests/test_dparray.py::test_astype[[]-int64-int64] tests/test_dparray.py::test_astype[[]-int64-int32] tests/test_dparray.py::test_astype[[]-int64-bool] -tests/test_dparray.py::test_astype[[]-int64-bool_] tests/test_dparray.py::test_astype[[]-int64-complex] tests/test_dparray.py::test_astype[[]-int32-float64] tests/test_dparray.py::test_astype[[]-int32-float32] tests/test_dparray.py::test_astype[[]-int32-int64] tests/test_dparray.py::test_astype[[]-int32-int32] tests/test_dparray.py::test_astype[[]-int32-bool] -tests/test_dparray.py::test_astype[[]-int32-bool_] tests/test_dparray.py::test_astype[[]-int32-complex] tests/test_dparray.py::test_astype[[]-bool-float64] tests/test_dparray.py::test_astype[[]-bool-float32] tests/test_dparray.py::test_astype[[]-bool-int64] tests/test_dparray.py::test_astype[[]-bool-int32] tests/test_dparray.py::test_astype[[]-bool-bool] -tests/test_dparray.py::test_astype[[]-bool-bool_] tests/test_dparray.py::test_astype[[]-bool-complex] -tests/test_dparray.py::test_astype[[]-bool_-float64] -tests/test_dparray.py::test_astype[[]-bool_-float32] -tests/test_dparray.py::test_astype[[]-bool_-int64] -tests/test_dparray.py::test_astype[[]-bool_-int32] -tests/test_dparray.py::test_astype[[]-bool_-bool] -tests/test_dparray.py::test_astype[[]-bool_-bool_] -tests/test_dparray.py::test_astype[[]-bool_-complex] tests/test_dparray.py::test_astype[[]-complex-float64] tests/test_dparray.py::test_astype[[]-complex-float32] tests/test_dparray.py::test_astype[[]-complex-int64] tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] -tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]] @@ -342,7 +330,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_K_strides tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_subok tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_zero_sized_array_strides -tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_eye tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_full_like_subok tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_subok tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok @@ -402,7 +389,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype -tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile + tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2 @@ -778,18 +765,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract + tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 01a2bb21dc92..e6598904e16f 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -18,7 +18,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8] tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9] @@ -29,11 +28,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9] -tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12] tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13] @@ -91,6 +88,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.], [4., 5., 6.]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary @@ -103,6 +101,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary + tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all @@ -301,54 +300,42 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{extern tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory tests/test_arraymanipulation.py::TestHstack::test_generator tests/test_arraymanipulation.py::TestVstack::test_generator + tests/test_dparray.py::test_astype[[]-float64-float64] tests/test_dparray.py::test_astype[[]-float64-float32] tests/test_dparray.py::test_astype[[]-float64-int64] tests/test_dparray.py::test_astype[[]-float64-int32] tests/test_dparray.py::test_astype[[]-float64-bool] -tests/test_dparray.py::test_astype[[]-float64-bool_] tests/test_dparray.py::test_astype[[]-float64-complex] tests/test_dparray.py::test_astype[[]-float32-float64] tests/test_dparray.py::test_astype[[]-float32-float32] tests/test_dparray.py::test_astype[[]-float32-int64] tests/test_dparray.py::test_astype[[]-float32-int32] tests/test_dparray.py::test_astype[[]-float32-bool] -tests/test_dparray.py::test_astype[[]-float32-bool_] tests/test_dparray.py::test_astype[[]-float32-complex] tests/test_dparray.py::test_astype[[]-int64-float64] tests/test_dparray.py::test_astype[[]-int64-float32] tests/test_dparray.py::test_astype[[]-int64-int64] tests/test_dparray.py::test_astype[[]-int64-int32] tests/test_dparray.py::test_astype[[]-int64-bool] -tests/test_dparray.py::test_astype[[]-int64-bool_] tests/test_dparray.py::test_astype[[]-int64-complex] tests/test_dparray.py::test_astype[[]-int32-float64] tests/test_dparray.py::test_astype[[]-int32-float32] tests/test_dparray.py::test_astype[[]-int32-int64] tests/test_dparray.py::test_astype[[]-int32-int32] tests/test_dparray.py::test_astype[[]-int32-bool] -tests/test_dparray.py::test_astype[[]-int32-bool_] tests/test_dparray.py::test_astype[[]-int32-complex] tests/test_dparray.py::test_astype[[]-bool-float64] tests/test_dparray.py::test_astype[[]-bool-float32] tests/test_dparray.py::test_astype[[]-bool-int64] tests/test_dparray.py::test_astype[[]-bool-int32] tests/test_dparray.py::test_astype[[]-bool-bool] -tests/test_dparray.py::test_astype[[]-bool-bool_] tests/test_dparray.py::test_astype[[]-bool-complex] -tests/test_dparray.py::test_astype[[]-bool_-float64] -tests/test_dparray.py::test_astype[[]-bool_-float32] -tests/test_dparray.py::test_astype[[]-bool_-int64] -tests/test_dparray.py::test_astype[[]-bool_-int32] -tests/test_dparray.py::test_astype[[]-bool_-bool] -tests/test_dparray.py::test_astype[[]-bool_-bool_] -tests/test_dparray.py::test_astype[[]-bool_-complex] tests/test_dparray.py::test_astype[[]-complex-float64] tests/test_dparray.py::test_astype[[]-complex-float32] tests/test_dparray.py::test_astype[[]-complex-int64] tests/test_dparray.py::test_astype[[]-complex-int32] tests/test_dparray.py::test_astype[[]-complex-bool] -tests/test_dparray.py::test_astype[[]-complex-bool_] tests/test_dparray.py::test_astype[[]-complex-complex] tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] @@ -577,7 +564,6 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype -tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 @@ -870,7 +856,7 @@ tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_arra tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_length tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_is_equal tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_not_equal -tests/third_party/cupy/logic_tests/test_comparison.py::TestComparisonOperator::test_binary_npscalar_array + tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast_arrays tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast @@ -981,6 +967,7 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3 + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.], [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary @@ -997,19 +984,13 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0., 1., 2.], [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3], [4, 5, 6]]), arg2=array([[0, 1, 2], [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input + tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input -tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract + tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index 5bb9795bbac8..63435bca11f0 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -1,4 +1,5 @@ import pytest +from .helper import get_all_dtypes import dpnp @@ -8,24 +9,13 @@ import numpy from numpy.testing import ( assert_allclose, + assert_almost_equal, assert_array_equal, assert_raises ) import tempfile - - -# TODO: discuss with DPCTL why no exception on complex128 -def is_dtype_supported(dtype, no_complex_check=False): - device = dpctl.SyclQueue().sycl_device - - if dtype is dpnp.float16 and not device.has_aspect_fp16: - return False - if dtype is dpnp.float64 and not device.has_aspect_fp64: - return False - if dtype is dpnp.complex128 and not device.has_aspect_fp64 and not no_complex_check: - return False - return True +import operator @pytest.mark.parametrize("start", @@ -37,11 +27,7 @@ def is_dtype_supported(dtype, no_complex_check=False): @pytest.mark.parametrize("step", [None, 1, 2.7, -1.6, 100], ids=['None', '1', '2.7', '-1.6', '100']) -@pytest.mark.parametrize("dtype", - [numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32], - ids=['complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False)) def test_arange(start, stop, step, dtype): rtol_mult = 2 if numpy.issubdtype(dtype, numpy.float16): @@ -50,26 +36,23 @@ def test_arange(start, stop, step, dtype): func = lambda xp: xp.arange(start, stop=stop, step=step, dtype=dtype) - if not is_dtype_supported(dtype): - if stop is None: - _stop, _start = start, 0 - else: - _stop, _start = stop, start - _step = 1 if step is None else step - - if _start == _stop: - pass - elif (_step < 0) ^ (_start < _stop): - # exception is raising when dpctl calls a kernel function, - # i.e. when resulting array is not empty - assert_raises(RuntimeError, func, dpnp) - return - exp_array = func(numpy) res_array = func(dpnp).asnumpy() - if numpy.issubdtype(dtype, numpy.floating) or numpy.issubdtype(dtype, numpy.complexfloating): - assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(dtype).eps) + if dtype is None: + _device = dpctl.SyclQueue().sycl_device + if not _device.has_aspect_fp64: + # numpy allocated array with dtype=float64 by default, + # while dpnp might use float32, if float64 isn't supported by device + _dtype = dpnp.float32 + rtol_mult *= 150 + else: + _dtype = dpnp.float64 + else: + _dtype = dtype + + if numpy.issubdtype(_dtype, numpy.floating) or numpy.issubdtype(_dtype, numpy.complexfloating): + assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(_dtype).eps) else: assert_array_equal(exp_array, res_array) @@ -101,43 +84,33 @@ def test_diag(v, k): @pytest.mark.parametrize("N", - [0, 1, 2, 3, 4], - ids=['0', '1', '2', '3', '4']) + [0, 1, 2, 3], + ids=['0', '1', '2', '3']) @pytest.mark.parametrize("M", - [None, 0, 1, 2, 3, 4], - ids=['None', '0', '1', '2', '3', '4']) + [None, 0, 1, 2, 3], + ids=['None', '0', '1', '2', '3']) @pytest.mark.parametrize("k", - [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5], - ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) -def test_eye(N, M, k, dtype): - expected = numpy.eye(N, M=M, k=k, dtype=dtype) - result = dpnp.eye(N, M=M, k=k, dtype=dtype) - assert_array_equal(expected, result) + [-4, -3, -2, -1, 0, 1, 2, 3, 4], + ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) +@pytest.mark.parametrize("order", + [None, "C", "F"], + ids=['None', 'C', 'F']) +def test_eye(N, M, k, dtype, order): + func = lambda xp: xp.eye(N, M, k=k, dtype=dtype, order=order) + assert_array_equal(func(numpy), func(dpnp)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) def test_frombuffer(dtype): - buffer = b'12345678' + buffer = b'12345678ABCDEF00' func = lambda xp: xp.frombuffer(buffer, dtype=dtype) - - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, func, dpnp) - return - - assert_array_equal(func(dpnp), func(numpy)) + assert_allclose(func(dpnp), func(numpy)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) def test_fromfile(dtype): with tempfile.TemporaryFile() as fh: fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08") @@ -145,76 +118,44 @@ def test_fromfile(dtype): func = lambda xp: xp.fromfile(fh, dtype=dtype) - if not is_dtype_supported(dtype): - fh.seek(0) - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, func, dpnp) - return - fh.seek(0) np_res = func(numpy) fh.seek(0) dpnp_res = func(dpnp) - assert_array_equal(dpnp_res, np_res) + assert_almost_equal(dpnp_res, np_res) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False)) def test_fromfunction(dtype): def func(x, y): return x * y shape = (3, 3) call_func = lambda xp: xp.fromfunction(func, shape=shape, dtype=dtype) - - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, call_func, dpnp) - return - assert_array_equal(call_func(dpnp), call_func(numpy)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) def test_fromiter(dtype): _iter = [1, 2, 3, 4] func = lambda xp: xp.fromiter(_iter, dtype=dtype) - - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, func, dpnp) - return - assert_array_equal(func(dpnp), func(numpy)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) def test_fromstring(dtype): string = "1 2 3 4" func = lambda xp: xp.fromstring(string, dtype=dtype, sep=' ') - - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, func, dpnp) - return - assert_array_equal(func(dpnp), func(numpy)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27]) @pytest.mark.parametrize("endpoint", @@ -225,11 +166,6 @@ def test_geomspace(dtype, num, endpoint): func = lambda xp: xp.geomspace(start, stop, num, endpoint, dtype) - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, func, dpnp) - return - np_res = func(numpy) dpnp_res = func(dpnp) @@ -244,25 +180,14 @@ def test_geomspace(dtype, num, endpoint): @pytest.mark.parametrize("n", [0, 1, 4], ids=['0', '1', '4']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, - numpy.bool, numpy.complex64, numpy.complex128, None], - ids=['float64', 'float32', 'int64', 'int32', - 'bool', 'complex64', 'complex128', 'None']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) def test_identity(n, dtype): func = lambda xp: xp.identity(n, dtype=dtype) - - if n > 0 and not is_dtype_supported(dtype): - assert_raises(RuntimeError, func, dpnp) - return - assert_array_equal(func(numpy), func(dpnp)) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) def test_loadtxt(dtype): func = lambda xp: xp.loadtxt(fh, dtype=dtype) @@ -270,12 +195,6 @@ def test_loadtxt(dtype): fh.write(b"1 2 3 4") fh.flush() - if not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - fh.seek(0) - assert_raises(ValueError, func, dpnp) - return - fh.seek(0) np_res = func(numpy) fh.seek(0) @@ -284,12 +203,8 @@ def test_loadtxt(dtype): assert_array_equal(dpnp_res, np_res) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, None], - ids=['float64', 'float32', 'int64', 'int32', 'None']) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("offset", [0, 1], ids=['0', '1']) @@ -317,21 +232,9 @@ def test_trace(array, offset, type, dtype): create_array = lambda xp: xp.array(array, type) trace_func = lambda xp, x: xp.trace(x, offset=offset, dtype=dtype) - if not is_dtype_supported(type): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, create_array, dpnp) - return - a = create_array(numpy) ia = create_array(dpnp) - - if not is_dtype_supported(dtype): - assert_raises(RuntimeError, trace_func, dpnp, ia) - return - - expected = trace_func(numpy, a) - result = trace_func(dpnp, ia) - assert_array_equal(expected, result) + assert_array_equal(trace_func(dpnp, ia), trace_func(numpy, a)) @pytest.mark.parametrize("N", @@ -343,16 +246,9 @@ def test_trace(array, offset, type, dtype): @pytest.mark.parametrize("k", [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5], ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int, numpy.float, int], - ids=['float64', 'float32', 'numpy.float', 'float', 'int64', 'int32', 'numpy.int', 'int']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_tri(N, M, k, dtype): func = lambda xp: xp.tri(N, M, k, dtype=dtype) - - if M > 0 and N > 0 and not is_dtype_supported(dtype): - assert_raises(RuntimeError, func, dpnp) - return - assert_array_equal(func(dpnp), func(numpy)) @@ -363,48 +259,50 @@ def test_tri_default_dtype(): @pytest.mark.parametrize("k", - [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6], - ids=['-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6']) + [-3, -2, -1, 0, 1, 2, 3, 4, 5, + numpy.array(1), dpnp.array(2), dpt.asarray(3)], + ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5', + 'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)']) @pytest.mark.parametrize("m", - [[0, 1, 2, 3, 4], - [1, 1, 1, 1, 1], - [[0, 0], [0, 0]], + [[[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]], [[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]], - ids=['[0, 1, 2, 3, 4]', - '[1, 1, 1, 1, 1]', - '[[0, 0], [0, 0]]', + ids=['[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]', '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]', '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]']) -def test_tril(m, k): - a = numpy.array(m) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) +def test_tril(m, k, dtype): + a = numpy.array(m, dtype=dtype) ia = dpnp.array(a) - expected = numpy.tril(a, k) - result = dpnp.tril(ia, k) + expected = numpy.tril(a, k=k) + result = dpnp.tril(ia, k=k) assert_array_equal(expected, result) @pytest.mark.parametrize("k", - [-4, -3, -2, -1, 0, 1, 2, 3, 4], - ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4']) + [-3, -2, -1, 0, 1, 2, 3, 4, 5, + numpy.array(1), dpnp.array(2), dpt.asarray(3)], + ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5', + 'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)']) @pytest.mark.parametrize("m", - [[0, 1, 2, 3, 4], - [[1, 2], [3, 4]], + [[[1, 2], [3, 4]], [[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]], - ids=['[0, 1, 2, 3, 4]', - '[[1, 2], [3, 4]]', + ids=['[[1, 2], [3, 4]]', '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]', '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]']) -def test_triu(m, k): - a = numpy.array(m) +@pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) +def test_triu(m, k, dtype): + a = numpy.array(m, dtype=dtype) ia = dpnp.array(a) - expected = numpy.triu(a, k) - result = dpnp.triu(ia, k) + expected = numpy.triu(a, k=k) + result = dpnp.triu(ia, k=k) assert_array_equal(expected, result) @@ -414,8 +312,8 @@ def test_triu(m, k): def test_triu_size_null(k): a = numpy.ones(shape=(1, 2, 0)) ia = dpnp.array(a) - expected = numpy.triu(a, k) - result = dpnp.triu(ia, k) + expected = numpy.triu(a, k=k) + result = dpnp.triu(ia, k=k) assert_array_equal(expected, result) @@ -426,11 +324,7 @@ def test_triu_size_null(k): ids=['[1, 2, 3, 4]', '[]', '[0, 3, 5]']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, - numpy.bool, numpy.complex64, numpy.complex128], - ids=['float64', 'float32', 'int64', 'int32', - 'bool', 'complex64', 'complex128']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize("n", [0, 1, 4, None], ids=['0', '1', '4', 'None']) @@ -441,18 +335,8 @@ def test_vander(array, dtype, n, increase): create_array = lambda xp: xp.array(array, dtype=dtype) vander_func = lambda xp, x: xp.vander(x, N=n, increasing=increase) - if array and not is_dtype_supported(dtype): - # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead - assert_raises(ValueError, create_array, dpnp) - return - a_np = numpy.array(array, dtype=dtype) a_dpnp = dpnp.array(array, dtype=dtype) - - if array and not is_dtype_supported(dtype): - assert_raises(RuntimeError, vander_func, dpnp, a_dpnp) - return - assert_array_equal(vander_func(numpy, a_np), vander_func(dpnp, a_dpnp)) @@ -462,21 +346,12 @@ def test_vander(array, dtype, n, increase): @pytest.mark.parametrize("fill_value", [1.5, 2, 1.5+0.j], ids=['1.5', '2', '1.5+0.j']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_full(shape, fill_value, dtype, order): func = lambda xp: xp.full(shape, fill_value, dtype=dtype, order=order) - - if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True): - assert_raises(RuntimeError, func, dpnp) - return - assert_array_equal(func(numpy), func(dpnp)) @@ -486,23 +361,15 @@ def test_full(shape, fill_value, dtype, order): @pytest.mark.parametrize("fill_value", [1.5, 2, 1.5+0.j], ids=['1.5', '2', '1.5+0.j']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_full_like(array, fill_value, dtype, order): - a = numpy.array(array) - ia = dpnp.array(array) func = lambda xp, x: xp.full_like(x, fill_value, dtype=dtype, order=order) - if ia.size and not is_dtype_supported(dtype, no_complex_check=True): - assert_raises(RuntimeError, func, dpnp, ia) - return - + a = numpy.array(array) + ia = dpnp.array(array) assert_array_equal(func(numpy, a), func(dpnp, ia)) @@ -534,7 +401,9 @@ def test_full_strides(): assert_array_equal(dpnp.asnumpy(ia), a) -@pytest.mark.parametrize("fill_value", [[], (), dpnp.full(0, 0)], ids=['[]', '()', 'dpnp.full(0, 0)']) +@pytest.mark.parametrize("fill_value", + [[], (), dpnp.full(0, 0)], + ids=['[]', '()', 'dpnp.full(0, 0)']) def test_full_invalid_fill_value(fill_value): with pytest.raises(ValueError): dpnp.full(10, fill_value=fill_value) @@ -543,120 +412,102 @@ def test_full_invalid_fill_value(fill_value): @pytest.mark.parametrize("shape", [(), 0, (0,), (2, 0, 3), (3, 2)], ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_zeros(shape, dtype, order): - expected = numpy.zeros(shape, dtype=dtype, order=order) - result = dpnp.zeros(shape, dtype=dtype, order=order) - - assert_array_equal(expected, result) + func = lambda xp: xp.zeros(shape, dtype=dtype, order=order) + assert_array_equal(func(numpy), func(dpnp)) @pytest.mark.parametrize("array", [[], 0, [1, 2, 3], [[1, 2], [3, 4]]], ids=['[]', '0', '[1, 2, 3]', '[[1, 2], [3, 4]]']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_zeros_like(array, dtype, order): + func = lambda xp, x: xp.zeros_like(x, dtype=dtype, order=order) + a = numpy.array(array) ia = dpnp.array(array) - - expected = numpy.zeros_like(a, dtype=dtype, order=order) - result = dpnp.zeros_like(ia, dtype=dtype, order=order) - - assert_array_equal(expected, result) + assert_array_equal(func(numpy, a), func(dpnp, ia)) @pytest.mark.parametrize("shape", [(), 0, (0,), (2, 0, 3), (3, 2)], ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_empty(shape, dtype, order): - expected = numpy.empty(shape, dtype=dtype, order=order) - result = dpnp.empty(shape, dtype=dtype, order=order) - - assert expected.shape == result.shape + func = lambda xp: xp.empty(shape, dtype=dtype, order=order) + assert func(numpy).shape == func(dpnp).shape @pytest.mark.parametrize("array", [[], 0, [1, 2, 3], [[1, 2], [3, 4]]], ids=['[]', '0', '[1, 2, 3]', '[[1, 2], [3, 4]]']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_empty_like(array, dtype, order): + func = lambda xp, x: xp.empty_like(x, dtype=dtype, order=order) + a = numpy.array(array) ia = dpnp.array(array) - - expected = numpy.empty_like(a, dtype=dtype, order=order) - result = dpnp.empty_like(ia, dtype=dtype, order=order) - - assert expected.shape == result.shape + assert func(numpy, a).shape == func(dpnp, ia).shape @pytest.mark.parametrize("shape", [(), 0, (0,), (2, 0, 3), (3, 2)], ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_ones(shape, dtype, order): func = lambda xp: xp.ones(shape, dtype=dtype, order=order) - - if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True): - assert_raises(RuntimeError, func, dpnp) - return - assert_array_equal(func(numpy), func(dpnp)) @pytest.mark.parametrize("array", [[], 0, [1, 2, 3], [[1, 2], [3, 4]]], ids=['[]', '0', '[1, 2, 3]', '[[1, 2], [3, 4]]']) -@pytest.mark.parametrize("dtype", - [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, - numpy.float16, numpy.int64, numpy.int32, numpy.bool], - ids=['None', 'complex128', 'complex64', 'float64', 'float32', - 'float16', 'int64', 'int32', 'bool']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) @pytest.mark.parametrize("order", [None, "C", "F"], ids=['None', 'C', 'F']) def test_ones_like(array, dtype, order): + func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order) + a = numpy.array(array) ia = dpnp.array(array) - func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order) + assert_array_equal(func(numpy, a), func(dpnp, ia)) - if ia.size and not is_dtype_supported(dtype, no_complex_check=True): - assert_raises(RuntimeError, func, dpnp, ia) - return - assert_array_equal(func(numpy, a), func(dpnp, ia)) +@pytest.mark.parametrize( + "func, args", + [ + pytest.param("full_like", + ['x0', '4']), + pytest.param("zeros_like", + ['x0']), + pytest.param("ones_like", + ['x0']), + pytest.param("empty_like", + ['x0']), + ]) +def test_dpctl_tensor_input(func, args): + x0 = dpt.reshape(dpt.arange(9), (3,3)) + new_args = [eval(val, {'x0' : x0}) for val in args] + X = getattr(dpt, func)(*new_args) + Y = getattr(dpnp, func)(*new_args) + if func is 'empty_like': + assert X.shape == Y.shape + else: + assert_array_equal(X, Y) diff --git a/tests/test_dparray.py b/tests/test_dparray.py index 6ff1672b853b..62a0120f8a33 100644 --- a/tests/test_dparray.py +++ b/tests/test_dparray.py @@ -1,15 +1,17 @@ -import dpnp -import numpy import pytest +from .helper import get_all_dtypes + +import dpnp import dpctl.tensor as dpt +import numpy +from numpy.testing import ( + assert_array_equal +) + -@pytest.mark.parametrize("res_dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex], - ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex']) -@pytest.mark.parametrize("arr_dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex], - ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex']) +@pytest.mark.parametrize("res_dtype", get_all_dtypes()) +@pytest.mark.parametrize("arr_dtype", get_all_dtypes()) @pytest.mark.parametrize("arr", [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []], ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]']) @@ -18,12 +20,10 @@ def test_astype(arr, arr_dtype, res_dtype): dpnp_array = dpnp.array(numpy_array) expected = numpy_array.astype(res_dtype) result = dpnp_array.astype(res_dtype) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.parametrize("arr_dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex], - ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex']) +@pytest.mark.parametrize("arr_dtype", get_all_dtypes()) @pytest.mark.parametrize("arr", [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []], ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]']) @@ -32,7 +32,7 @@ def test_flatten(arr, arr_dtype): dpnp_array = dpnp.array(arr, dtype=arr_dtype) expected = numpy_array.flatten() result = dpnp_array.flatten() - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("shape", @@ -51,7 +51,7 @@ def test_flags(shape, order): @pytest.mark.parametrize("dtype", - [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool], + [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_], ids=['complex64', 'float32', 'int64', 'int32', 'bool']) @pytest.mark.parametrize("strides", [(1, 4) , (4, 1)], @@ -68,3 +68,134 @@ def test_flags_strides(dtype, order, strides): assert usm_array.flags == dpnp_array.flags assert numpy_array.flags.c_contiguous == dpnp_array.flags.c_contiguous assert numpy_array.flags.f_contiguous == dpnp_array.flags.f_contiguous + +def test_print_dpnp_int(): + result = repr(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4')) + expected = "array([ 1, 0, 2, -3, -1, 2, 21, -9], dtype=int32)" + assert(result==expected) + + result = str(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4')) + expected = "[ 1 0 2 -3 -1 2 21 -9]" + assert(result==expected) +# int32 + result = repr(dpnp.array([1, -1, 21], dtype=dpnp.int32)) + expected = "array([ 1, -1, 21], dtype=int32)" + assert(result==expected) + + result = str(dpnp.array([1, -1, 21], dtype=dpnp.int32)) + expected = "[ 1 -1 21]" + assert(result==expected) +# uint8 + result = repr(dpnp.array([1, 0, 3], dtype=numpy.uint8)) + expected = "array([1, 0, 3], dtype=uint8)" + assert(result==expected) + + result = str(dpnp.array([1, 0, 3], dtype=numpy.uint8)) + expected = "[1 0 3]" + assert(result==expected) + +def test_print_dpnp_float(): + result = repr(dpnp.array([1, -1, 21], dtype=float)) + expected = "array([ 1., -1., 21.])" + assert(result==expected) + + result = str(dpnp.array([1, -1, 21], dtype=float)) + expected = "[ 1. -1. 21.]" + assert(result==expected) +# float32 + result = repr(dpnp.array([1, -1, 21], dtype=dpnp.float32)) + expected = "array([ 1., -1., 21.], dtype=float32)" + assert(result==expected) + + result = str(dpnp.array([1, -1, 21], dtype=dpnp.float32)) + expected = "[ 1. -1. 21.]" + assert(result==expected) + +def test_print_dpnp_complex(): + result = repr(dpnp.array([1, -1, 21], dtype=complex)) + expected = "array([ 1.+0.j, -1.+0.j, 21.+0.j])" + assert(result==expected) + + result = str(dpnp.array([1, -1, 21], dtype=complex)) + expected = "[ 1.+0.j -1.+0.j 21.+0.j]" + assert(result==expected) + +def test_print_dpnp_boolean(): + result = repr(dpnp.array([1, 0, 3], dtype=bool)) + expected = "array([ True, False, True])" + assert(result==expected) + + result = str(dpnp.array([1, 0, 3], dtype=bool)) + expected = "[ True False True]" + assert(result==expected) + +def test_print_dpnp_special_character(): +# NaN + result = repr(dpnp.array([1., 0., dpnp.nan, 3.])) + expected = "array([ 1., 0., nan, 3.])" + assert(result==expected) + + result = str(dpnp.array([1., 0., dpnp.nan, 3.])) + expected = "[ 1. 0. nan 3.]" + assert(result==expected) +# inf + result = repr(dpnp.array([1., 0., numpy.inf, 3.])) + expected = "array([ 1., 0., inf, 3.])" + assert(result==expected) + + result = str(dpnp.array([1., 0., numpy.inf, 3.])) + expected = "[ 1. 0. inf 3.]" + assert(result==expected) + +def test_print_dpnp_nd(): +# 1D + result = repr(dpnp.arange(10000, dtype='float32')) + expected = "array([0.000e+00, 1.000e+00, 2.000e+00, ..., 9.997e+03, 9.998e+03,\n 9.999e+03], dtype=float32)" + assert(result==expected) + + result = str(dpnp.arange(10000, dtype='float32')) + expected = "[0.000e+00 1.000e+00 2.000e+00 ... 9.997e+03 9.998e+03 9.999e+03]" + assert(result==expected) + +# 2D + result = repr(dpnp.array([[1, 2], [3, 4]], dtype=float)) + expected = "array([[1., 2.],\n [3., 4.]])" + assert(result==expected) + + result = str(dpnp.array([[1, 2], [3, 4]])) + expected = "[[1 2]\n [3 4]]" + assert(result==expected) + +# 0 shape + result = repr(dpnp.empty( shape=(0, 0) )) + expected = "array([])" + assert(result==expected) + + result = str(dpnp.empty( shape=(0, 0) )) + expected = "[]" + assert(result==expected) + +@pytest.mark.parametrize("func", [bool, float, int, complex]) +@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True)) +def test_scalar_type_casting(func, shape, dtype): + numpy_array = numpy.full(shape, 5, dtype=dtype) + dpnp_array = dpnp.full(shape, 5, dtype=dtype) + assert func(numpy_array) == func(dpnp_array) + + +@pytest.mark.parametrize("method", ["__bool__", "__float__", "__int__", "__complex__"]) +@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True, no_none=True)) +def test_scalar_type_casting_by_method(method, shape, dtype): + numpy_array = numpy.full(shape, 4.7, dtype=dtype) + dpnp_array = dpnp.full(shape, 4.7, dtype=dtype) + assert getattr(numpy_array, method)() == getattr(dpnp_array, method)() + + +@pytest.mark.parametrize("shape", [(1,), (1, 1), (1, 1, 1)]) +@pytest.mark.parametrize("index_dtype", [dpnp.int32, dpnp.int64]) +def test_array_as_index(shape, index_dtype): + ind_arr = dpnp.ones(shape, dtype=index_dtype) + a = numpy.arange(ind_arr.size + 1) + assert a[tuple(ind_arr)] == a[1] diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 091cf1345c4e..1a40777afac8 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -3,6 +3,9 @@ import dpnp import numpy +from numpy.testing import ( + assert_array_equal +) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -16,7 +19,7 @@ def test_choose(): expected = numpy.choose([0, 0, 0, 0], [a, b, c]) result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic]) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("offset", @@ -47,7 +50,7 @@ def test_diagonal(array, offset): ia = dpnp.array(a) expected = numpy.diagonal(a, offset) result = dpnp.diagonal(ia, offset) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("val", @@ -71,7 +74,7 @@ def test_fill_diagonal(array, val): ia = dpnp.array(a) expected = numpy.fill_diagonal(a, val) result = dpnp.fill_diagonal(ia, val) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dimension", @@ -81,7 +84,7 @@ def test_fill_diagonal(array, val): def test_indices(dimension): expected = numpy.indices(dimension) result = dpnp.indices(dimension) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", @@ -107,7 +110,7 @@ def test_nonzero(array): ia = dpnp.array(array) expected = numpy.nonzero(a) result = dpnp.nonzero(ia) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -137,7 +140,7 @@ def test_place1(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -161,7 +164,7 @@ def test_place2(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -186,7 +189,7 @@ def test_place3(arr, mask, vals): im = dpnp.array(m) numpy.place(a, m, vals) dpnp.place(ia, im, vals) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("v", @@ -211,7 +214,7 @@ def test_put(array, ind, v): ia = dpnp.array(a) numpy.put(a, ind, v) dpnp.put(ia, ind, v) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("v", @@ -236,7 +239,7 @@ def test_put2(array, ind, v): ia = dpnp.array(a) numpy.put(a, ind, v) dpnp.put(ia, ind, v) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) def test_put3(): @@ -244,7 +247,7 @@ def test_put3(): ia = dpnp.array(a) dpnp.put(ia, [0, 2], [-44, -55]) numpy.put(a, [0, 2], [-44, -55]) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -256,7 +259,7 @@ def test_put_along_axis_val_int(): for axis in range(2): numpy.put_along_axis(a, ind_r, 777, axis) dpnp.put_along_axis(ai, ind_r_i, 777, axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -268,7 +271,7 @@ def test_put_along_axis1(): for axis in range(3): numpy.put_along_axis(a, ind_r, 777, axis) dpnp.put_along_axis(ai, ind_r_i, 777, axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -280,7 +283,7 @@ def test_put_along_axis2(): for axis in range(3): numpy.put_along_axis(a, ind_r, [100, 200, 300, 400], axis) dpnp.put_along_axis(ai, ind_r_i, [100, 200, 300, 400], axis) - numpy.testing.assert_array_equal(a, ai) + assert_array_equal(a, ai) @pytest.mark.parametrize("vals", @@ -309,7 +312,7 @@ def test_putmask1(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("vals", @@ -334,7 +337,7 @@ def test_putmask2(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) @pytest.mark.parametrize("vals", @@ -360,7 +363,7 @@ def test_putmask3(arr, mask, vals): iv = dpnp.array(v) numpy.putmask(a, m, v) dpnp.putmask(ia, im, iv) - numpy.testing.assert_array_equal(a, ia) + assert_array_equal(a, ia) def test_select(): @@ -378,7 +381,7 @@ def test_select(): ichoicelist = [ichoice_val1, ichoice_val2] expected = numpy.select(condlist, choicelist) result = dpnp.select(icondlist, ichoicelist) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array_type", @@ -415,10 +418,9 @@ def test_take(array, indices, array_type, indices_type): iind = dpnp.array(ind) expected = numpy.take(a, ind) result = dpnp.take(ia, iind) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_take_along_axis(): a = numpy.arange(16).reshape(4, 4) ai = dpnp.array(a) @@ -427,10 +429,9 @@ def test_take_along_axis(): for axis in range(2): expected = numpy.take_along_axis(a, ind_r, axis) result = dpnp.take_along_axis(ai, ind_r_i, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_take_along_axis1(): a = numpy.arange(64).reshape(4, 4, 4) ai = dpnp.array(a) @@ -439,7 +440,7 @@ def test_take_along_axis1(): for axis in range(3): expected = numpy.take_along_axis(a, ind_r, axis) result = dpnp.take_along_axis(ai, ind_r_i, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("m", @@ -454,7 +455,7 @@ def test_take_along_axis1(): def test_tril_indices(n, k, m): result = dpnp.tril_indices(n, k, m) expected = numpy.tril_indices(n, k, m) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("k", @@ -472,7 +473,7 @@ def test_tril_indices_from(array, k): ia = dpnp.array(a) result = dpnp.tril_indices_from(ia, k) expected = numpy.tril_indices_from(a, k) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("m", @@ -487,7 +488,7 @@ def test_tril_indices_from(array, k): def test_triu_indices(n, k, m): result = dpnp.triu_indices(n, k, m) expected = numpy.triu_indices(n, k, m) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("k", @@ -505,4 +506,4 @@ def test_triu_indices_from(array, k): ia = dpnp.array(a) result = dpnp.triu_indices_from(ia, k) expected = numpy.triu_indices_from(a, k) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index dd89a18adbd6..d9784a41558f 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,9 +1,15 @@ import pytest +from .helper import get_all_dtypes import dpnp as inp import dpctl + import numpy +from numpy.testing import ( + assert_allclose, + assert_array_equal +) def vvsort(val, vec, size, xp): @@ -49,7 +55,7 @@ def test_cholesky(array): ia = inp.array(a) result = inp.linalg.cholesky(ia) expected = numpy.linalg.cholesky(a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("arr", @@ -63,7 +69,7 @@ def test_cond(arr, p): ia = inp.array(a) result = inp.linalg.cond(ia, p) expected = numpy.linalg.cond(a, p) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", @@ -82,13 +88,11 @@ def test_det(array): ia = inp.array(a) result = inp.linalg.det(ia) expected = numpy.linalg.det(a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("size", [2, 4, 8, 16, 300]) def test_eig_arange(type, size): @@ -115,21 +119,19 @@ def test_eig_arange(type, size): if np_vec[0, i] * dpnp_vec[0, i] < 0: np_vec[:, i] = -np_vec[:, i] - numpy.testing.assert_array_equal(symm_orig, symm) - numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm) + assert_array_equal(symm_orig, symm) + assert_array_equal(dpnp_symm_orig, dpnp_symm) assert (dpnp_val.dtype == np_val.dtype) assert (dpnp_vec.dtype == np_vec.dtype) assert (dpnp_val.shape == np_val.shape) assert (dpnp_vec.shape == np_vec.shape) - numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05) - numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) + assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05) + assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_eigvals(type): if dpctl.get_current_device_type() != dpctl.device_type.gpu: pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005") @@ -144,12 +146,10 @@ def test_eigvals(type): ia = inp.array(a) result = inp.linalg.eigvals(ia) expected = numpy.linalg.eigvals(a) - numpy.testing.assert_allclose(expected, result, atol=0.5) + assert_allclose(expected, result, atol=0.5) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[[1., 2.], [3., 4.]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]], ids=['[[1., 2.], [3., 4.]]', '[[0, 1, 2], [3, 2, -1], [4, -2, 3]]']) @@ -158,12 +158,10 @@ def test_inv(type, array): ia = inp.array(a) result = inp.linalg.inv(ia) expected = numpy.linalg.inv(a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) @pytest.mark.parametrize("array", [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]], ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]']) @@ -177,10 +175,11 @@ def test_matrix_rank(type, tol, array): result = inp.linalg.matrix_rank(ia, tol=tol) expected = numpy.linalg.matrix_rank(a, tol=tol) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") +@pytest.mark.usefixtures("suppress_divide_numpy_warnings") @pytest.mark.parametrize("array", [[7], [1, 2], [1, 0]], ids=['[7]', '[1, 2]', '[1, 0]']) @@ -195,7 +194,7 @@ def test_norm1(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -213,7 +212,7 @@ def test_norm2(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -231,13 +230,11 @@ def test_norm3(array, ord, axis): ia = inp.array(a) result = inp.linalg.norm(ia, ord=ord, axis=axis) expected = numpy.linalg.norm(a, ord=ord, axis=axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(2, 2), (3, 4), (5, 3), (16, 16)], ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)']) @@ -262,7 +259,7 @@ def test_qr(type, shape, mode): tol = 1e-11 # check decomposition - numpy.testing.assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol) + assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol) # NP change sign for comparison ncols = min(a.shape[0], a.shape[1]) @@ -273,15 +270,12 @@ def test_qr(type, shape, mode): np_r[i, :] = -np_r[i, :] if numpy.any(numpy.abs(np_r[i, :]) > tol): - numpy.testing.assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol) - numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) + assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(2, 2), (3, 4), (5, 3), (16, 16)], ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)']) @@ -310,10 +304,10 @@ def test_svd(type, shape): dpnp_diag_s[i, i] = dpnp_s[i] # check decomposition - numpy.testing.assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) + assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol) # compare singular values - # numpy.testing.assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol) + # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol) # change sign of vectors for i in range(min(shape[0], shape[1])): @@ -323,5 +317,5 @@ def test_svd(type, shape): # compare vectors for non-zero values for i in range(numpy.count_nonzero(np_s > tol)): - numpy.testing.assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) - numpy.testing.assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol) + assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol) diff --git a/tests/test_logic.py b/tests/test_logic.py index b3280be07618..425106fd2efe 100644 --- a/tests/test_logic.py +++ b/tests/test_logic.py @@ -1,13 +1,16 @@ import pytest +from .helper import get_all_dtypes import dpnp import numpy +from numpy.testing import ( + assert_allclose, + assert_equal +) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_], - ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_']) +@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True)) @pytest.mark.parametrize("shape", [(0,), (4,), (2, 3), (2, 2, 2)], ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)']) @@ -31,16 +34,14 @@ def test_all(type, shape): np_res = numpy.all(a) dpnp_res = dpnp.all(ia) - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) np_res = a.all() dpnp_res = ia.all() - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) +@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_allclose(type): a = numpy.random.rand(10) @@ -51,7 +52,7 @@ def test_allclose(type): np_res = numpy.allclose(a, b) dpnp_res = dpnp.allclose(dpnp_a, dpnp_b) - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) a[0] = numpy.inf @@ -59,12 +60,10 @@ def test_allclose(type): np_res = numpy.allclose(a, b) dpnp_res = dpnp.allclose(dpnp_a, dpnp_b) - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_], - ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_']) +@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True)) @pytest.mark.parametrize("shape", [(0,), (4,), (2, 3), (2, 2, 2)], ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)']) @@ -88,58 +87,156 @@ def test_any(type, shape): np_res = numpy.any(a) dpnp_res = dpnp.any(ia) - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) np_res = a.any() dpnp_res = ia.any() - numpy.testing.assert_allclose(dpnp_res, np_res) + assert_allclose(dpnp_res, np_res) + + +def test_equal(): + a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) + ia = dpnp.array(a) + for i in range(len(a)): + np_res = (a == i) + dpnp_res = (ia == i) + assert_equal(dpnp_res, np_res) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_greater(): a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) ia = dpnp.array(a) for i in range(len(a) + 1): np_res = (a > i) dpnp_res = (ia > i) - numpy.testing.assert_equal(dpnp_res, np_res) + assert_equal(dpnp_res, np_res) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_greater_equal(): a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) ia = dpnp.array(a) for i in range(len(a) + 1): np_res = (a >= i) dpnp_res = (ia >= i) - numpy.testing.assert_equal(dpnp_res, np_res) + assert_equal(dpnp_res, np_res) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_less(): a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) ia = dpnp.array(a) for i in range(len(a) + 1): np_res = (a < i) dpnp_res = (ia < i) - numpy.testing.assert_equal(dpnp_res, np_res) + assert_equal(dpnp_res, np_res) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_less_equal(): a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) ia = dpnp.array(a) for i in range(len(a) + 1): np_res = (a <= i) dpnp_res = (ia <= i) - numpy.testing.assert_equal(dpnp_res, np_res) + assert_equal(dpnp_res, np_res) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_not_equal(): a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) ia = dpnp.array(a) for i in range(len(a)): np_res = (a != i) dpnp_res = (ia != i) - numpy.testing.assert_equal(dpnp_res, np_res) + assert_equal(dpnp_res, np_res) + + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) +@pytest.mark.parametrize("op", + ['logical_and', 'logical_or', 'logical_xor'], + ids=['logical_and', 'logical_or', 'logical_xor']) +def test_logic_comparison(op, dtype): + a = numpy.array([0, 0, 3, 2], dtype=dtype) + b = numpy.array([0, 4, 0, 2], dtype=dtype) + + # x1 OP x2 + np_res = getattr(numpy, op)(a, b) + dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b)) + assert_equal(dpnp_res, np_res) + + # x2 OP x1 + np_res = getattr(numpy, op)(b, a) + dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a)) + assert_equal(dpnp_res, np_res) + + # numpy.tile(x1, (10,)) OP numpy.tile(x2, (10,)) + a, b = numpy.tile(a, (10,)), numpy.tile(b, (10,)) + np_res = getattr(numpy, op)(a, b) + dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b)) + assert_equal(dpnp_res, np_res) + + # numpy.tile(x2, (10, 2)) OP numpy.tile(x1, (10, 2)) + a, b = numpy.tile(a, (10, 1)), numpy.tile(b, (10, 1)) + np_res = getattr(numpy, op)(b, a) + dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a)) + assert_equal(dpnp_res, np_res) + + +@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) +def test_logical_not(dtype): + a = dpnp.array([0, 4, 0, 2], dtype=dtype) + + np_res = numpy.logical_not(a.asnumpy()) + dpnp_res = dpnp.logical_not(a) + assert_equal(dpnp_res, np_res) + + +@pytest.mark.parametrize("op", + ['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'], + ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal']) +@pytest.mark.parametrize("x1", + [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]], + ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]']) +@pytest.mark.parametrize("x2", + [5, [1, 2, 5, 6]], + ids=['5', '[1, 2, 5, 6]']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) +def test_elemwise_comparison(op, x1, x2, dtype): + create_func = lambda xp, a: xp.asarray(a, dtype=dtype) if not numpy.isscalar(a) else numpy.dtype(dtype=dtype).type(a) + + np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2) + dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2) + + # x1 OP x2 + np_res = getattr(numpy, op)(np_x1, np_x2) + dpnp_res = getattr(dpnp, op)(dp_x1, dp_x2) + assert_equal(dpnp_res, np_res) + + # x2 OP x1 + np_res = getattr(numpy, op)(np_x2, np_x1) + dpnp_res = getattr(dpnp, op)(dp_x2, dp_x1) + assert_equal(dpnp_res, np_res) + + # x1[::-1] OP x2 + np_res = getattr(numpy, op)(np_x1[::-1], np_x2) + dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2) + assert_equal(dpnp_res, np_res) + + +@pytest.mark.parametrize("op", + ['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'], + ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal']) +@pytest.mark.parametrize("sh1", + [[10], [8, 4], [4, 1, 2]], + ids=['(10,)', '(8, 4)', '(4, 1, 2)']) +@pytest.mark.parametrize("sh2", + [[12], [4, 8], [1, 8, 6]], + ids=['(12,)', '(4, 8)', '(1, 8, 6)']) +def test_comparison_no_broadcast_with_shapes(op, sh1, sh2): + x1, x2 = dpnp.random.randn(*sh1), dpnp.random.randn(*sh2) + + # x1 OP x2 + with pytest.raises(ValueError): + getattr(dpnp, op)(x1, x2) + getattr(numpy, op)(x1.asnumpy(), x2.asnumpy()) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 21071bec41e9..78f628908337 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1,8 +1,15 @@ import pytest +from .helper import get_all_dtypes import dpnp import numpy +from numpy.testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + assert_raises +) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -10,27 +17,27 @@ class TestConvolve: def test_object(self): d = [1.] * 100 k = [1.] * 3 - numpy.testing.assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3)) + assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3)) def test_no_overwrite(self): d = dpnp.ones(100) k = dpnp.ones(3) dpnp.convolve(d, k) - numpy.testing.assert_array_equal(d, dpnp.ones(100)) - numpy.testing.assert_array_equal(k, dpnp.ones(3)) + assert_array_equal(d, dpnp.ones(100)) + assert_array_equal(k, dpnp.ones(3)) def test_mode(self): d = dpnp.ones(100) k = dpnp.ones(3) default_mode = dpnp.convolve(d, k, mode='full') full_mode = dpnp.convolve(d, k, mode='f') - numpy.testing.assert_array_equal(full_mode, default_mode) + assert_array_equal(full_mode, default_mode) # integer mode - with numpy.testing.assert_raises(ValueError): + with assert_raises(ValueError): dpnp.convolve(d, k, mode=-1) - numpy.testing.assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode) + assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode) # illegal arguments - with numpy.testing.assert_raises(TypeError): + with assert_raises(TypeError): dpnp.convolve(d, k, mode=None) @@ -53,33 +60,39 @@ def test_diff(array): dpnp_a = dpnp.array(array) expected = numpy.diff(np_a) result = dpnp.diff(dpnp_a) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) -@pytest.mark.parametrize("dtype1", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128']) -@pytest.mark.parametrize("dtype2", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128']) +@pytest.mark.parametrize("dtype1", get_all_dtypes()) +@pytest.mark.parametrize("dtype2", get_all_dtypes()) +@pytest.mark.parametrize("func", + ['add', 'multiply', 'subtract', 'divide']) @pytest.mark.parametrize("data", [[[1, 2], [3, 4]]], ids=['[[1, 2], [3, 4]]']) -def test_multiply_dtype(dtype1, dtype2, data): +def test_op_multiple_dtypes(dtype1, func, dtype2, data): np_a = numpy.array(data, dtype=dtype1) dpnp_a = dpnp.array(data, dtype=dtype1) np_b = numpy.array(data, dtype=dtype2) dpnp_b = dpnp.array(data, dtype=dtype2) - result = dpnp.multiply(dpnp_a, dpnp_b) - expected = numpy.multiply(np_a, np_b) - numpy.testing.assert_array_equal(result, expected) + if func == 'subtract' and (dtype1 == dtype2 == dpnp.bool): + with pytest.raises(TypeError): + result = getattr(dpnp, func)(dpnp_a, dpnp_b) + expected = getattr(numpy, func)(np_a, np_b) + else: + result = getattr(dpnp, func)(dpnp_a, dpnp_b) + expected = getattr(numpy, func)(np_a, np_b) + assert_array_equal(result, expected) @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3]) -@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9], 5, 0.5]) -@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64, numpy.float32, numpy.float64]) +@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9]]) +# TODO: achieve the same level of dtype support for all mathematical operations, like +# @pytest.mark.parametrize("dtype", get_all_dtypes()) +# and to get rid of fallbacks on numpy allowed by below fixture +# @pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestMathematical: @staticmethod @@ -90,74 +103,90 @@ def array_or_scalar(xp, data, dtype=None): return xp.array(data, dtype=dtype) def _test_mathematical(self, name, dtype, lhs, rhs): - a = self.array_or_scalar(dpnp, lhs, dtype=dtype) - b = self.array_or_scalar(dpnp, rhs, dtype=dtype) - result = getattr(dpnp, name)(a, b) - - a = self.array_or_scalar(numpy, lhs, dtype=dtype) - b = self.array_or_scalar(numpy, rhs, dtype=dtype) - expected = getattr(numpy, name)(a, b) - - numpy.testing.assert_allclose(result, expected, atol=1e-4) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + a_dpnp = self.array_or_scalar(dpnp, lhs, dtype=dtype) + b_dpnp = self.array_or_scalar(dpnp, rhs, dtype=dtype) + + a_np = self.array_or_scalar(numpy, lhs, dtype=dtype) + b_np = self.array_or_scalar(numpy, rhs, dtype=dtype) + + if name == 'subtract' and not numpy.isscalar(rhs) and dtype == dpnp.bool: + with pytest.raises(TypeError): + result = getattr(dpnp, name)(a_dpnp, b_dpnp) + expected = getattr(numpy, name)(a_np, b_np) + else: + result = getattr(dpnp, name)(a_dpnp, b_dpnp) + expected = getattr(numpy, name)(a_np, b_np) + assert_allclose(result, expected, atol=1e-4) + + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_add(self, dtype, lhs, rhs): self._test_mathematical('add', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_arctan2(self, dtype, lhs, rhs): self._test_mathematical('arctan2', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_copysign(self, dtype, lhs, rhs): self._test_mathematical('copysign', dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_divide(self, dtype, lhs, rhs): self._test_mathematical('divide', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_fmod(self, dtype, lhs, rhs): self._test_mathematical('fmod', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_floor_divide(self, dtype, lhs, rhs): self._test_mathematical('floor_divide', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_hypot(self, dtype, lhs, rhs): self._test_mathematical('hypot', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_maximum(self, dtype, lhs, rhs): self._test_mathematical('maximum', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_minimum(self, dtype, lhs, rhs): self._test_mathematical('minimum', dtype, lhs, rhs) + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_multiply(self, dtype, lhs, rhs): self._test_mathematical('multiply', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_remainder(self, dtype, lhs, rhs): self._test_mathematical('remainder', dtype, lhs, rhs) @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_power(self, dtype, lhs, rhs): self._test_mathematical('power', dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_subtract(self, dtype, lhs, rhs): self._test_mathematical('subtract', dtype, lhs, rhs) +@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings") @pytest.mark.parametrize("val_type", [bool, int, float], ids=['bool', 'int', 'float']) -@pytest.mark.parametrize("data_type", - [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("data_type", get_all_dtypes()) +@pytest.mark.parametrize("func", + ['add', 'multiply', 'subtract', 'divide']) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -172,33 +201,78 @@ def test_subtract(self, dtype, lhs, rhs): '[[1, 2], [3, 4]]', '[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]', '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]']) -def test_multiply_scalar(array, val, data_type, val_type): +def test_op_with_scalar(array, val, func, data_type, val_type): np_a = numpy.array(array, dtype=data_type) dpnp_a = dpnp.array(array, dtype=data_type) val_ = val_type(val) - result = dpnp.multiply(dpnp_a, val_) - expected = numpy.multiply(np_a, val_) - numpy.testing.assert_array_equal(result, expected) + if func == 'subtract' and val_type == bool and data_type == dpnp.bool: + with pytest.raises(TypeError): + result = getattr(dpnp, func)(dpnp_a, val_) + expected = getattr(numpy, func)(np_a, val_) + + result = getattr(dpnp, func)(val_, dpnp_a) + expected = getattr(numpy, func)(val_, np_a) + else: + result = getattr(dpnp, func)(dpnp_a, val_) + expected = getattr(numpy, func)(np_a, val_) + assert_allclose(result, expected) + + result = getattr(dpnp, func)(val_, dpnp_a) + expected = getattr(numpy, func)(val_, np_a) + assert_allclose(result, expected) + + +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_multiply_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 * dpnp_a * 1.7 + expected = 0.5 * np_a * 1.7 + assert_allclose(result, expected) + - result = dpnp.multiply(val_, dpnp_a) - expected = numpy.multiply(val_, np_a) - numpy.testing.assert_array_equal(result, expected) +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_add_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 + dpnp_a + 1.7 + expected = 0.5 + np_a + 1.7 + assert_allclose(result, expected) @pytest.mark.parametrize("shape", [(), (3, 2)], ids=['()', '(3, 2)']) -@pytest.mark.parametrize("dtype", - [numpy.float32, numpy.float64], - ids=['numpy.float32', 'numpy.float64']) -def test_multiply_scalar2(shape, dtype): +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_subtract_scalar(shape, dtype): np_a = numpy.ones(shape, dtype=dtype) dpnp_a = dpnp.ones(shape, dtype=dtype) - result = 0.5 * dpnp_a - expected = 0.5 * np_a - numpy.testing.assert_array_equal(result, expected) + result = 0.5 - dpnp_a - 1.7 + expected = 0.5 - np_a - 1.7 + assert_allclose(result, expected) + + +@pytest.mark.parametrize("shape", + [(), (3, 2)], + ids=['()', '(3, 2)']) +@pytest.mark.parametrize("dtype", get_all_dtypes()) +def test_divide_scalar(shape, dtype): + np_a = numpy.ones(shape, dtype=dtype) + dpnp_a = dpnp.ones(shape, dtype=dtype) + + result = 0.5 / dpnp_a / 1.7 + expected = 0.5 / np_a / 1.7 + assert_allclose(result, expected) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -211,7 +285,7 @@ def test_nancumprod(array): result = dpnp.nancumprod(dpnp_a) expected = numpy.nancumprod(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -224,31 +298,25 @@ def test_nancumsum(array): result = dpnp.nancumsum(dpnp_a) expected = numpy.nancumsum(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("data", [[[1., -1.], [0.1, -0.1]], [-2, -1, 0, 1, 2]], ids=['[[1., -1.], [0.1, -0.1]]', '[-2, -1, 0, 1, 2]']) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) def test_negative(data, dtype): np_a = numpy.array(data, dtype=dtype) dpnp_a = dpnp.array(data, dtype=dtype) result = dpnp.negative(dpnp_a) expected = numpy.negative(np_a) - numpy.testing.assert_array_equal(result, expected) + assert_array_equal(result, expected) @pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("val_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) -@pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32']) +@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)) +@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("val", [0, 1, 5], ids=['0', '1', '5']) @@ -269,12 +337,11 @@ def test_power(array, val, data_type, val_type): val_ = val_type(val) result = dpnp.power(dpnp_a, val_) expected = numpy.power(np_a, val_) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) class TestEdiff1d: - @pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[1, 2, 4, 7, 0], [], [1], @@ -285,7 +352,7 @@ def test_ediff1d_int(self, array, data_type): result = dpnp.ediff1d(dpnp_a) expected = numpy.ediff1d(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -297,13 +364,12 @@ def test_ediff1d_args(self): result = dpnp.ediff1d(np_a, to_end=to_end, to_begin=to_begin) expected = numpy.ediff1d(np_a, to_end=to_end, to_begin=to_begin) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestTrapz: - @pytest.mark.parametrize("data_type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("array", [[1, 2, 3], [[1, 2, 3], [4, 5, 6]], [1, 4, 6, 9, 10, 12], @@ -315,12 +381,10 @@ def test_trapz_default(self, array, data_type): result = dpnp.trapz(dpnp_a) expected = numpy.trapz(np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) - @pytest.mark.parametrize("data_type_y", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) - @pytest.mark.parametrize("data_type_x", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32]) + @pytest.mark.parametrize("data_type_y", get_all_dtypes(no_bool=True, no_complex=True)) + @pytest.mark.parametrize("data_type_x", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("y_array", [[1, 2, 4, 5], [1., 2.5, 6., 7.]]) @pytest.mark.parametrize("x_array", [[2, 5, 6, 9]]) @@ -333,7 +397,7 @@ def test_trapz_with_x_params(self, y_array, x_array, data_type_y, data_type_x): result = dpnp.trapz(dpnp_y, dpnp_x) expected = numpy.trapz(np_y, np_x) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("array", [[1, 2, 3], [4, 5, 6]]) def test_trapz_with_x_param_2ndim(self, array): @@ -342,7 +406,7 @@ def test_trapz_with_x_param_2ndim(self, array): result = dpnp.trapz(dpnp_a, dpnp_a) expected = numpy.trapz(np_a, np_a) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("y_array", [[1, 2, 4, 5], [1., 2.5, 6., 7., ]]) @@ -353,7 +417,7 @@ def test_trapz_with_dx_params(self, y_array, dx): result = dpnp.trapz(dpnp_y, dx=dx) expected = numpy.trapz(np_y, dx=dx) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -388,10 +452,9 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis): result = dpnp.cross(dpnp_x1, dpnp_x2, axisa, axisb, axisc, axis) expected = numpy.cross(np_x1, np_x2, axisa, axisb, axisc, axis) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestGradient: @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9], @@ -403,8 +466,9 @@ def test_gradient_y1(self, array): result = dpnp.gradient(dpnp_y) expected = numpy.gradient(np_y) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9], [3., 4., 7.5, 9.], [2, 6, 8, 10]]) @@ -415,7 +479,7 @@ def test_gradient_y1_dx(self, array, dx): result = dpnp.gradient(dpnp_y, dx) expected = numpy.gradient(np_y, dx) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) class TestCeil: @@ -433,7 +497,7 @@ def test_ceil(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.ceil(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -473,7 +537,7 @@ def test_floor(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.floor(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -513,7 +577,7 @@ def test_trunc(self): np_array = numpy.array(array_data, dtype=numpy.float64) expected = numpy.trunc(np_array, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], @@ -556,7 +620,7 @@ def test_power(self): np_array2 = numpy.array(array2_data, dtype=numpy.float64) expected = numpy.power(np_array1, np_array2, out=out) - numpy.testing.assert_array_equal(expected, result) + assert_array_equal(expected, result) @pytest.mark.parametrize("dtype", [numpy.float32, numpy.int64, numpy.int32], diff --git a/tests/test_random.py b/tests/test_random.py index 54cb2fa3a4d7..bc3501f4d20b 100644 --- a/tests/test_random.py +++ b/tests/test_random.py @@ -75,7 +75,6 @@ def test_input_shape(func): assert shape == res.shape -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("func", [dpnp.random.random, dpnp.random.random_sample, diff --git a/tests/test_random_state.py b/tests/test_random_state.py index b93f52411c5d..0d1752c744ee 100644 --- a/tests/test_random_state.py +++ b/tests/test_random_state.py @@ -34,8 +34,8 @@ def get_default_floating(): class TestNormal: @pytest.mark.parametrize("dtype", - [dpnp.float32, dpnp.float64, None], - ids=['float32', 'float64', 'None']) + [dpnp.float32, dpnp.float64, dpnp.float, None], + ids=['float32', 'float64', 'float', 'None']) @pytest.mark.parametrize("usm_type", ["host", "device", "shared"], ids=['host', 'device', 'shared']) @@ -173,9 +173,9 @@ def test_fallback(self, loc, scale): @pytest.mark.parametrize("dtype", - [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int, + [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int, dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_], - ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int', + ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int', 'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_']) def test_invalid_dtype(self, dtype): # dtype must be float32 or float64 @@ -257,8 +257,8 @@ def test_wrong_dims(self): class TestRandInt: @pytest.mark.parametrize("dtype", - [int, dpnp.int32, dpnp.int], - ids=['int', 'dpnp.int32', 'dpnp.int']) + [int, dpnp.int32, dpnp.int, dpnp.integer], + ids=['int', 'dpnp.int32', 'dpnp.int', 'dpnp.integer']) @pytest.mark.parametrize("usm_type", ["host", "device", "shared"], ids=['host', 'device', 'shared']) @@ -267,6 +267,9 @@ def test_distr(self, dtype, usm_type): low = 1 high = 10 + if dtype in (dpnp.int, dpnp.integer) and dtype != dpnp.dtype('int32'): + pytest.skip("dtype isn't alias on dpnp.int32 on the target OS, so there will be a fallback") + sycl_queue = dpctl.SyclQueue() data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low, high=high, @@ -319,7 +322,6 @@ def test_negative_bounds(self): assert_array_equal(actual, desired) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_negative_interval(self): rs = RandomState(3567) @@ -421,16 +423,16 @@ def test_bounds_fallback(self, low, high): @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", - [dpnp.int64, dpnp.integer, dpnp.bool, dpnp.bool_, bool], - ids=['dpnp.int64', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool']) + [dpnp.int64, dpnp.int, dpnp.integer, dpnp.bool, dpnp.bool_, bool], + ids=['dpnp.int64', 'dpnp.int', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool']) def test_dtype_fallback(self, dtype): seed = 157 low = -3 if not dtype in {dpnp.bool_, bool} else 0 high = 37 if not dtype in {dpnp.bool_, bool} else 2 size = (3, 2, 5) - if dtype == dpnp.integer and dtype == dpnp.dtype('int32'): - pytest.skip("dpnp.integer is alias on dpnp.int32 on the target OS, so no fallback here") + if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'): + pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no fallback here") # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy() @@ -714,8 +716,8 @@ class TestUniform: [[1.23, 10.54], [10.54, 1.23]], ids=['(low, high)=[1.23, 10.54]', '(low, high)=[10.54, 1.23]']) @pytest.mark.parametrize("dtype", - [dpnp.float32, dpnp.float64, dpnp.int32, None], - ids=['float32', 'float64', 'int32', 'None']) + [dpnp.float32, dpnp.float64, dpnp.float, dpnp.int32, None], + ids=['float32', 'float64', 'float', 'int32', 'None']) @pytest.mark.parametrize("usm_type", ["host", "device", "shared"], ids=['host', 'device', 'shared']) @@ -831,12 +833,15 @@ def test_fallback(self, low, high): @pytest.mark.parametrize("dtype", - [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int, int, + [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int, int, dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_], - ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int', + ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int', 'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_']) def test_invalid_dtype(self, dtype): - # dtype must be float32 or float64 + if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'): + pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no error here") + + # dtype must be int32, float32 or float64 assert_raises(TypeError, RandomState().uniform, dtype=dtype) diff --git a/tests/test_strides.py b/tests/test_strides.py index 7ec1d6b3f03f..02e8c8689757 100644 --- a/tests/test_strides.py +++ b/tests/test_strides.py @@ -1,8 +1,13 @@ import math import pytest +from .helper import get_all_dtypes import dpnp + import numpy +from numpy.testing import ( + assert_allclose +) def _getattr(ex, str_): @@ -15,12 +20,10 @@ def _getattr(ex, str_): @pytest.mark.parametrize("func_name", ['abs', ]) -@pytest.mark.parametrize("type", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=['float64', 'float32', 'int64', 'int32']) -def test_strides(func_name, type): +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) +def test_strides(func_name, dtype): shape = (4, 4) - a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape) + a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) a_strides = a[0::2, 0::2] dpa = dpnp.array(a) dpa_strides = dpa[0::2, 0::2] @@ -31,17 +34,16 @@ def test_strides(func_name, type): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a_strides) - numpy.testing.assert_allclose(expected, result) + assert_allclose(expected, result) +@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings") @pytest.mark.parametrize("func_name", ["arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "cbrt", "ceil", "copy", "cos", "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log", "log10", "log1p", "log2", "negative", "radians", "sign", "sin", "sinh", "sqrt", "square", "tanh", "trunc"]) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -58,12 +60,10 @@ def test_strides_1arg(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -80,12 +80,10 @@ def test_strides_erf(dtype, shape): for idx, val in enumerate(b): expected[idx] = math.erf(val) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -101,12 +99,10 @@ def test_strides_reciprocal(dtype, shape): result = dpnp.reciprocal(dpb) expected = numpy.reciprocal(b) - numpy.testing.assert_allclose(result, expected, rtol=1e-06) + assert_allclose(result, expected, rtol=1e-06) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"]) @@ -120,14 +116,12 @@ def test_strides_tan(dtype, shape): result = dpnp.tan(dpb) expected = numpy.tan(b) - numpy.testing.assert_allclose(result, expected, rtol=1e-06) + assert_allclose(result, expected, rtol=1e-06) @pytest.mark.parametrize("func_name", ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"]) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -144,7 +138,7 @@ def test_strides_2args(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) @pytest.mark.parametrize("func_name", @@ -168,12 +162,10 @@ def test_strides_bitwise(func_name, dtype, shape): numpy_func = _getattr(numpy, func_name) expected = numpy_func(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -187,13 +179,10 @@ def test_strides_copysign(dtype, shape): result = dpnp.copysign(dpa, dpb) expected = numpy.copysign(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -207,13 +196,10 @@ def test_strides_fmod(dtype, shape): result = dpnp.fmod(dpa, dpb) expected = numpy.fmod(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("dtype", - [numpy.float64, numpy.float32, numpy.int64, numpy.int32], - ids=["float64", "float32", "int64", "int32"]) +@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) @pytest.mark.parametrize("shape", [(3, 3)], ids=["(3, 3)"]) @@ -227,4 +213,4 @@ def test_strides_true_devide(dtype, shape): result = dpnp.fmod(dpa, dpb) expected = numpy.fmod(a, b) - numpy.testing.assert_allclose(result, expected) + assert_allclose(result, expected) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 26a71eef2936..1bffa18111b8 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1,9 +1,15 @@ import pytest +from .helper import get_all_dtypes import dpnp import dpctl import numpy +from numpy.testing import ( + assert_array_equal, + assert_raises +) + list_of_backend_str = [ "host", @@ -17,7 +23,7 @@ "cpu", ] -available_devices = [d for d in dpctl.get_devices() if not d.has_aspect_host] +available_devices = [d for d in dpctl.get_devices() if not getattr(d, 'has_aspect_host', False)] valid_devices = [] for device in available_devices: @@ -76,27 +82,30 @@ def vvsort(val, vec, size, xp): "func, arg, kwargs", [ pytest.param("arange", - -25.7, + [-25.7], {'stop': 10**8, 'step': 15}), pytest.param("full", - (2,2), + [(2,2)], {'fill_value': 5}), + pytest.param("eye", + [4, 2], + {}), pytest.param("ones", - (2,2), + [(2,2)], {}), pytest.param("zeros", - (2,2), + [(2,2)], {}) ]) @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) def test_array_creation(func, arg, kwargs, device): - numpy_array = getattr(numpy, func)(arg, **kwargs) + numpy_array = getattr(numpy, func)(*arg, **kwargs) dpnp_kwargs = dict(kwargs) dpnp_kwargs['device'] = device - dpnp_array = getattr(dpnp, func)(arg, **dpnp_kwargs) + dpnp_array = getattr(dpnp, func)(*arg, **dpnp_kwargs) numpy.testing.assert_array_equal(numpy_array, dpnp_array) assert dpnp_array.sycl_device == device @@ -152,12 +161,22 @@ def test_array_creation_like(func, kwargs, device_x, device_y): dpnp_kwargs = dict(kwargs) dpnp_kwargs['device'] = device_y - + y = getattr(dpnp, func)(x, **dpnp_kwargs) numpy.testing.assert_array_equal(y_orig, y) assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue) +@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_tril_triu(func, device): + x0 = dpnp.ones((3,3), device=device) + x = getattr(dpnp, func)(x0) + assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "func,data", @@ -335,36 +354,114 @@ def test_broadcasting(func, data1, data2, device): assert_sycl_queue_equal(result_queue, expected_queue) +@pytest.mark.parametrize("func", ["add", "copysign", "divide", "floor_divide", "fmod", + "maximum", "minimum", "multiply", "outer", "power", + "remainder", "subtract"]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +def test_2in_1out_diff_queue_but_equal_context(func, device): + x1 = dpnp.arange(10) + x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1] + with assert_raises(ValueError): + getattr(dpnp, func)(x1, x2) + + +@pytest.mark.parametrize( + "func, kwargs", + [ + pytest.param("normal", + {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}), + pytest.param("rand", + {'d0': 20}), + pytest.param("randint", + {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}), + pytest.param("randn", + {'d0': 20}), + pytest.param("random", + {'size': (35, 45)}), + pytest.param("random_integers", + {'low': -17, 'high': 3, 'size': (12, 16)}), + pytest.param("random_sample", + {'size': (7, 7)}), + pytest.param("ranf", + {'size': (10, 7, 12)}), + pytest.param("sample", + {'size': (7, 9)}), + pytest.param("standard_normal", + {'size': (4, 4, 8)}), + pytest.param("uniform", + {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)}) + ]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) @pytest.mark.parametrize("usm_type", ["host", "device", "shared"]) -@pytest.mark.parametrize("size", - [None, (), 3, (2, 1), (4, 2, 5)], - ids=['None', '()', '3', '(2,1)', '(4,2,5)']) -def test_uniform(usm_type, size): - low = 1.0 - high = 2.0 - res = dpnp.random.uniform(low, high, size=size, usm_type=usm_type) +def test_random(func, kwargs, device, usm_type): + kwargs = {**kwargs, 'device': device, 'usm_type': usm_type} + + # test with default SYCL queue per a device + res_array = getattr(dpnp.random, func)(**kwargs) + assert device == res_array.sycl_device + assert usm_type == res_array.usm_type - assert usm_type == res.usm_type + sycl_queue = dpctl.SyclQueue(device, property="in_order") + kwargs['device'] = None + kwargs['sycl_queue'] = sycl_queue + # test with in-order SYCL queue per a device and passed as argument + res_array = getattr(dpnp.random, func)(**kwargs) + assert usm_type == res_array.usm_type + assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue) + +@pytest.mark.parametrize( + "func, args, kwargs", + [ + pytest.param("normal", + [], + {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}), + pytest.param("rand", + [15, 30, 5], + {}), + pytest.param("randint", + [], + {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}), + pytest.param("randn", + [20, 5, 40], + {}), + pytest.param("random_sample", + [], + {'size': (7, 7)}), + pytest.param("standard_normal", + [], + {'size': (4, 4, 8)}), + pytest.param("uniform", + [], + {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)}) + ]) +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) @pytest.mark.parametrize("usm_type", ["host", "device", "shared"]) -@pytest.mark.parametrize("seed", - [None, (), 123, (12, 58), (147, 56, 896), [1, 654, 78]], - ids=['None', '()', '123', '(12,58)', '(147,56,896)', '[1,654,78]']) -def test_rs_uniform(usm_type, seed): - seed = 123 - sycl_queue = dpctl.SyclQueue() - low = 1.0 - high = 2.0 - rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue) - res = rs.uniform(low, high, usm_type=usm_type) +def test_random_state(func, args, kwargs, device, usm_type): + kwargs = {**kwargs, 'usm_type': usm_type} - assert usm_type == res.usm_type + # test with default SYCL queue per a device + rs = dpnp.random.RandomState(seed=1234567, device=device) + res_array = getattr(rs, func)(*args, **kwargs) + assert device == res_array.sycl_device + assert usm_type == res_array.usm_type - res_sycl_queue = res.get_array().sycl_queue - assert_sycl_queue_equal(res_sycl_queue, sycl_queue) + sycl_queue = dpctl.SyclQueue(device, property="in_order") + + # test with in-order SYCL queue per a device and passed as argument + rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue) + res_array = getattr(rs, func)(*args, **kwargs) + assert usm_type == res_array.usm_type + assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @@ -569,7 +666,7 @@ def test_eig(device): dpnp_val_queue = dpnp_val.get_array().sycl_queue dpnp_vec_queue = dpnp_vec.get_array().sycl_queue - # compare queue and device + # compare queue and device assert_sycl_queue_equal(dpnp_val_queue, expected_queue) assert_sycl_queue_equal(dpnp_vec_queue, expected_queue) @@ -655,7 +752,6 @@ def test_qr(device): assert_sycl_queue_equal(dpnp_r_queue, expected_queue) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("device", valid_devices, ids=[device.filter_string for device in valid_devices]) @@ -663,7 +759,7 @@ def test_svd(device): tol = 1e-12 shape = (2,2) numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape) - dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape) + dpnp_data = dpnp.arange(shape[0] * shape[1], device=device).reshape(shape) np_u, np_s, np_vt = numpy.linalg.svd(numpy_data) dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data) @@ -675,7 +771,7 @@ def test_svd(device): assert (dpnp_vt.shape == np_vt.shape) # check decomposition - dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype) + dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype, device=device) for i in range(dpnp_s.size): dpnp_diag_s[i, i] = dpnp_s[i] @@ -739,3 +835,39 @@ def test_array_copy(device, func, device_param, queue_param): result = dpnp.array(dpnp_data, **kwargs) assert_sycl_queue_equal(result.sycl_queue, dpnp_data.sycl_queue) + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +#TODO need to delete no_bool=True when use dlpack > 0.7 version +@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True)) +@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)]) +def test_from_dlpack(arr_dtype, shape, device): + X = dpnp.empty(shape=shape, dtype=arr_dtype, device=device) + Y = dpnp.from_dlpack(X) + assert_array_equal(X, Y) + assert X.__dlpack_device__() == Y.__dlpack_device__() + assert X.sycl_device == Y.sycl_device + assert X.sycl_context == Y.sycl_context + assert X.usm_type == Y.usm_type + if Y.ndim: + V = Y[::-1] + W = dpnp.from_dlpack(V) + assert V.strides == W.strides + + +@pytest.mark.parametrize("device", + valid_devices, + ids=[device.filter_string for device in valid_devices]) +#TODO need to delete no_bool=True when use dlpack > 0.7 version +@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True)) +def test_from_dlpack_with_dpt(arr_dtype, device): + X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device) + Y = dpnp.from_dlpack(X) + assert_array_equal(X, Y) + assert isinstance(Y, dpnp.dpnp_array.dpnp_array) + assert X.__dlpack_device__() == Y.__dlpack_device__() + assert X.sycl_device == Y.sycl_device + assert X.sycl_context == Y.sycl_context + assert X.usm_type == Y.usm_type diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 094fe419c263..1a33a1d655dd 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -11,16 +11,17 @@ ] -@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) -def test_coerced_usm_types_sum(usm_type): - x = dp.arange(10, usm_type = "device") - y = dp.arange(10, usm_type = usm_type) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_sum(usm_type_x, usm_type_y): + x = dp.arange(1000, usm_type = usm_type_x) + y = dp.arange(1000, usm_type = usm_type_y) - z = x + y - - assert z.usm_type == x.usm_type - assert z.usm_type == "device" - assert y.usm_type == usm_type + z = 1.3 + x + y + 2 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) @@ -29,8 +30,34 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y): x = dp.arange(10, usm_type = usm_type_x) y = dp.arange(10, usm_type = usm_type_y) - z = x * y - + z = 3 * x * y * 1.5 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) + + +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_subtract(usm_type_x, usm_type_y): + x = dp.arange(50, usm_type = usm_type_x) + y = dp.arange(50, usm_type = usm_type_y) + + z = 20 - x - y - 7.4 + + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) + + +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_divide(usm_type_x, usm_type_y): + x = dp.arange(120, usm_type = usm_type_x) + y = dp.arange(120, usm_type = usm_type_y) + + z = 2 / x / y / 1.5 + assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) @@ -61,3 +88,32 @@ def test_array_creation(func, args, usm_type_x, usm_type_y): assert x.usm_type == usm_type_x assert y.usm_type == usm_type_y + + +@pytest.mark.skip() +@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"]) +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +def test_tril_triu(func, usm_type): + x0 = dp.ones((3,3), usm_type=usm_type) + x = getattr(dp, func)(x0) + assert x.usm_type == usm_type + + +@pytest.mark.parametrize("op", + ['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal'], + ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', + 'logical_and', 'logical_or', 'logical_xor', 'not_equal']) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y): + x = dp.arange(100, usm_type = usm_type_x) + y = dp.arange(100, usm_type = usm_type_y)[::-1] + + z = getattr(dp, op)(x, y) + zx = getattr(dp, op)(x, 50) + zy = getattr(dp, op)(30, y) + + assert x.usm_type == zx.usm_type == usm_type_x + assert y.usm_type == zy.usm_type == usm_type_y + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y]) diff --git a/tests/third_party/cupy/creation_tests/test_basic.py b/tests/third_party/cupy/creation_tests/test_basic.py index 337718d3caf3..1adcf98f969b 100644 --- a/tests/third_party/cupy/creation_tests/test_basic.py +++ b/tests/third_party/cupy/creation_tests/test_basic.py @@ -164,7 +164,7 @@ def test_empty_zero_sized_array_strides(self, order): @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_eye(self, xp, dtype): - return xp.eye(5, 4, 1, dtype) + return xp.eye(5, 4, k=1, dtype=dtype) @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py index e07d927b1cf0..ce71ef311a56 100644 --- a/tests/third_party/cupy/creation_tests/test_from_data.py +++ b/tests/third_party/cupy/creation_tests/test_from_data.py @@ -454,6 +454,7 @@ def test_asfortranarray_cuda_array_zero_dim_dtype( a = xp.ones((), dtype=dtype_a) return xp.asfortranarray(a, dtype=dtype_b) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_fromfile(self, xp): with tempfile.TemporaryFile() as fh: diff --git a/tests/third_party/cupy/creation_tests/test_matrix.py b/tests/third_party/cupy/creation_tests/test_matrix.py index a5471f213ebf..fe144cbc58c4 100644 --- a/tests/third_party/cupy/creation_tests/test_matrix.py +++ b/tests/third_party/cupy/creation_tests/test_matrix.py @@ -140,6 +140,7 @@ def test_tri_posi(self, xp, dtype): {'shape': (2, 3, 4)}, ) @testing.gpu +@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestTriLowerAndUpper(unittest.TestCase): @testing.for_all_dtypes(no_complex=True) @@ -148,7 +149,6 @@ def test_tril(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) return xp.tril(m) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_tril_array_like(self, xp): return xp.tril([[1, 2], [3, 4]]) @@ -157,13 +157,13 @@ def test_tril_array_like(self, xp): @testing.numpy_cupy_array_equal() def test_tril_nega(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) - return xp.tril(m, -1) + return xp.tril(m, k=-1) @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_array_equal() def test_tril_posi(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) - return xp.tril(m, 1) + return xp.tril(m, k=1) @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_array_equal() @@ -171,7 +171,6 @@ def test_triu(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) return xp.triu(m) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.numpy_cupy_array_equal() def test_triu_array_like(self, xp): return xp.triu([[1, 2], [3, 4]]) @@ -180,10 +179,10 @@ def test_triu_array_like(self, xp): @testing.numpy_cupy_array_equal() def test_triu_nega(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) - return xp.triu(m, -1) + return xp.triu(m, k=-1) @testing.for_all_dtypes(no_complex=True) @testing.numpy_cupy_array_equal() def test_triu_posi(self, xp, dtype): m = testing.shaped_arange(self.shape, xp, dtype) - return xp.triu(m, 1) + return xp.triu(m, k=1) diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py index 75960e492c17..4d5bc03f81b0 100644 --- a/tests/third_party/cupy/creation_tests/test_ranges.py +++ b/tests/third_party/cupy/creation_tests/test_ranges.py @@ -54,7 +54,7 @@ def test_arange8(self, xp, dtype): def test_arange9(self): for xp in (numpy, cupy): - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): xp.arange(10, dtype=xp.bool_) @testing.numpy_cupy_array_equal() diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py index d10e503bcec8..2bb0404ab599 100644 --- a/tests/third_party/cupy/indexing_tests/test_generate.py +++ b/tests/third_party/cupy/indexing_tests/test_generate.py @@ -28,7 +28,7 @@ def test_indices_list2(self, xp, dtype): def test_indices_list3(self): for xp in (numpy, cupy): - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): xp.indices((1, 2, 3, 4), dtype=xp.bool_) diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py index ed6a156e8848..fdcc5357e19e 100644 --- a/tests/third_party/cupy/indexing_tests/test_insert.py +++ b/tests/third_party/cupy/indexing_tests/test_insert.py @@ -42,7 +42,7 @@ class TestPlaceRaises(unittest.TestCase): def test_place_empty_value_error(self, dtype): for xp in (numpy, cupy): a = testing.shaped_arange(self.shape, xp, dtype) - mask = testing.shaped_arange(self.shape, xp, numpy.int) % 2 == 0 + mask = testing.shaped_arange(self.shape, xp, numpy.int_) % 2 == 0 vals = testing.shaped_random((0,), xp, dtype) with pytest.raises(ValueError): xp.place(a, mask, vals) diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py index 0be9eaeee610..67848359188d 100644 --- a/tests/third_party/cupy/logic_tests/test_comparison.py +++ b/tests/third_party/cupy/logic_tests/test_comparison.py @@ -8,7 +8,6 @@ from tests.third_party.cupy import testing -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestComparison(unittest.TestCase): @@ -38,7 +37,6 @@ def test_equal(self): self.check_binary('equal') -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestComparisonOperator(unittest.TestCase): diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py index 55b8617882b1..cdbd035cd265 100644 --- a/tests/third_party/cupy/logic_tests/test_ops.py +++ b/tests/third_party/cupy/logic_tests/test_ops.py @@ -20,18 +20,14 @@ def check_binary(self, name, xp, dtype): b = testing.shaped_reverse_arange((2, 3), xp, dtype) return getattr(xp, name)(a, b) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_logical_and(self): self.check_binary('logical_and') - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_logical_or(self): self.check_binary('logical_or') - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_logical_xor(self): self.check_binary('logical_xor') - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_logical_not(self): self.check_unary('logical_not') diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py index 28771b4979b5..027722d8bef2 100644 --- a/tests/third_party/cupy/math_tests/test_arithmetic.py +++ b/tests/third_party/cupy/math_tests/test_arithmetic.py @@ -1,5 +1,6 @@ import itertools import unittest +import warnings import numpy import pytest @@ -130,8 +131,8 @@ def check_binary(self, xp): func = getattr(xp, self.name) with testing.NumpyError(divide='ignore'): - with numpy.warnings.catch_warnings(): - numpy.warnings.filterwarnings('ignore') + with warnings.catch_warnings(): + warnings.filterwarnings('ignore') if self.use_dtype: y = func(arg1, arg2, dtype=self.dtype) else: @@ -145,17 +146,14 @@ def check_binary(self, xp): y = y.astype(numpy.complex64) # NumPy returns an output array of another type than DPNP when input ones have diffrent types. - if self.name == 'multiply' and xp is cupy: - if xp.isscalar(arg1) and xp.isscalar(arg2): - # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar. - y = numpy.asarray(y) - elif dtype1 != dtype2: - is_array_arg1 = not xp.isscalar(arg1) - is_array_arg2 = not xp.isscalar(arg2) + if xp is cupy and dtype1 != dtype2 and not self.use_dtype: + is_array_arg1 = not xp.isscalar(arg1) + is_array_arg2 = not xp.isscalar(arg2) - is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) - is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) + is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating) + is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type) + if self.name in ('add', 'multiply', 'subtract'): if is_array_arg1 and is_array_arg2: # If both inputs are arrays where one is of floating type and another - integer, # NumPy will return an output array of always "float64" type, @@ -170,6 +168,13 @@ def check_binary(self, xp): y = y.astype(dtype1) elif is_array_arg2 and not is_array_arg1: y = y.astype(dtype2) + elif self.name in ('divide', 'true_divide'): + # If one input is an array of float32 and another - an integer or floating scalar, + # NumPy will return an output array of float32, while DPNP will return the array of float64, + # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type. + if not (is_array_arg1 and is_array_arg2): + if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32): + y = y.astype(numpy.float32) # NumPy returns different values (nan/inf) on division by zero # depending on the architecture. @@ -187,7 +192,6 @@ def check_binary(self, xp): @testing.gpu @testing.parameterize(*( testing.product({ - # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13 'arg1': [testing.shaped_arange((2, 3), numpy, dtype=d) for d in all_types ] + [0, 0.0, 2, 2.0], @@ -282,7 +286,6 @@ def test_modf(self, xp, dtype): 'xp': [numpy, cupy], 'shape': [(3, 2), (), (3, 0, 2)] })) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestBoolSubtract(unittest.TestCase): diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py index 3f8a0169ac12..f3b844cdc6a5 100644 --- a/tests/third_party/cupy/random_tests/test_sample.py +++ b/tests/third_party/cupy/random_tests/test_sample.py @@ -33,7 +33,6 @@ def test_lo_hi_nonrandom(self): a = random.randint(-1.1, -0.9, size=(2, 2)) numpy.testing.assert_array_equal(a, cupy.full((2, 2), -1)) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_zero_sizes(self): a = random.randint(10, size=(0,)) numpy.testing.assert_array_equal(a, cupy.array(())) @@ -112,7 +111,6 @@ def test_goodness_of_fit_2(self): self.assertTrue(hypothesis.chi_square_test(counts, expected)) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestRandintDtype(unittest.TestCase): diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py index aea22d02c511..60d3413b0daa 100644 --- a/tests/third_party/cupy/statistics_tests/test_meanvar.py +++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py @@ -89,7 +89,6 @@ def test_median_axis_sequence(self, xp, dtype): return xp.median(a, self.axis, keepdims=self.keepdims) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.gpu class TestAverage(unittest.TestCase): @@ -101,12 +100,14 @@ def test_average_all(self, xp, dtype): a = testing.shaped_arange((2, 3), xp, dtype) return xp.average(a) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_axis(self, xp, dtype): a = testing.shaped_arange((2, 3, 4), xp, dtype) return xp.average(a, axis=1) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_weights(self, xp, dtype): @@ -114,6 +115,7 @@ def test_average_weights(self, xp, dtype): w = testing.shaped_arange((2, 3), xp, dtype) return xp.average(a, weights=w) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_average_axis_weights(self, xp, dtype): @@ -132,6 +134,7 @@ def check_returned(self, a, axis, weights): testing.assert_allclose(average_cpu, average_gpu) testing.assert_allclose(sum_weights_cpu, sum_weights_gpu) + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() def test_returned(self, dtype): a = testing.shaped_arange((2, 3), numpy, dtype) diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl index 30b66da5e663..c2c0dc78ec54 100644 --- a/tests_external/skipped_tests_numpy.tbl +++ b/tests_external/skipped_tests_numpy.tbl @@ -318,83 +318,6 @@ tests/test_datetime.py::TestDateTime::test_timedelta_np_int_construction[Y] tests/test_datetime.py::TestDateTime::test_timedelta_object_array_conversion tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction_units -tests/test_defchararray.py::TestBasic::test_from_object_array -tests/test_defchararray.py::TestBasic::test_from_object_array_unicode -tests/test_defchararray.py::TestBasic::test_from_string -tests/test_defchararray.py::TestBasic::test_from_string_array -tests/test_defchararray.py::TestBasic::test_from_unicode -tests/test_defchararray.py::TestBasic::test_from_unicode_array -tests/test_defchararray.py::TestBasic::test_unicode_upconvert -tests/test_defchararray.py::TestChar::test_it -tests/test_defchararray.py::TestComparisonsMixed1::test_equal -tests/test_defchararray.py::TestComparisonsMixed1::test_greater -tests/test_defchararray.py::TestComparisonsMixed1::test_greater_equal -tests/test_defchararray.py::TestComparisonsMixed1::test_less -tests/test_defchararray.py::TestComparisonsMixed1::test_less_equal -tests/test_defchararray.py::TestComparisonsMixed1::test_not_equal -tests/test_defchararray.py::TestComparisonsMixed2::test_equal -tests/test_defchararray.py::TestComparisonsMixed2::test_greater -tests/test_defchararray.py::TestComparisonsMixed2::test_greater_equal -tests/test_defchararray.py::TestComparisonsMixed2::test_less -tests/test_defchararray.py::TestComparisonsMixed2::test_less_equal -tests/test_defchararray.py::TestComparisonsMixed2::test_not_equal -tests/test_defchararray.py::TestComparisons::test_equal -tests/test_defchararray.py::TestComparisons::test_greater -tests/test_defchararray.py::TestComparisons::test_greater_equal -tests/test_defchararray.py::TestComparisons::test_less -tests/test_defchararray.py::TestComparisons::test_less_equal -tests/test_defchararray.py::TestComparisons::test_not_equal -tests/test_defchararray.py::test_empty_indexing -tests/test_defchararray.py::TestInformation::test_count -tests/test_defchararray.py::TestInformation::test_endswith -tests/test_defchararray.py::TestInformation::test_find -tests/test_defchararray.py::TestInformation::test_index -tests/test_defchararray.py::TestInformation::test_isalnum -tests/test_defchararray.py::TestInformation::test_isalpha -tests/test_defchararray.py::TestInformation::test_isdigit -tests/test_defchararray.py::TestInformation::test_islower -tests/test_defchararray.py::TestInformation::test_isspace -tests/test_defchararray.py::TestInformation::test_istitle -tests/test_defchararray.py::TestInformation::test_isupper -tests/test_defchararray.py::TestInformation::test_len -tests/test_defchararray.py::TestInformation::test_rfind -tests/test_defchararray.py::TestInformation::test_rindex -tests/test_defchararray.py::TestInformation::test_startswith -tests/test_defchararray.py::TestMethods::test_capitalize -tests/test_defchararray.py::TestMethods::test_center -tests/test_defchararray.py::TestMethods::test_decode -tests/test_defchararray.py::TestMethods::test_encode -tests/test_defchararray.py::TestMethods::test_expandtabs -tests/test_defchararray.py::TestMethods::test_isdecimal -tests/test_defchararray.py::TestMethods::test_isnumeric -tests/test_defchararray.py::TestMethods::test_join -tests/test_defchararray.py::TestMethods::test_ljust -tests/test_defchararray.py::TestMethods::test_lower -tests/test_defchararray.py::TestMethods::test_lstrip -tests/test_defchararray.py::TestMethods::test_partition -tests/test_defchararray.py::TestMethods::test_replace -tests/test_defchararray.py::TestMethods::test_rjust -tests/test_defchararray.py::TestMethods::test_rpartition -tests/test_defchararray.py::TestMethods::test_rsplit -tests/test_defchararray.py::TestMethods::test_rstrip -tests/test_defchararray.py::TestMethods::test_split -tests/test_defchararray.py::TestMethods::test_splitlines -tests/test_defchararray.py::TestMethods::test_strip -tests/test_defchararray.py::TestMethods::test_swapcase -tests/test_defchararray.py::TestMethods::test_title -tests/test_defchararray.py::TestMethods::test_upper -tests/test_defchararray.py::TestOperations::test_add -tests/test_defchararray.py::TestOperations::test_mod -tests/test_defchararray.py::TestOperations::test_mul -tests/test_defchararray.py::TestOperations::test_radd -tests/test_defchararray.py::TestOperations::test_rmod -tests/test_defchararray.py::TestOperations::test_rmul -tests/test_defchararray.py::TestOperations::test_slice -tests/test_defchararray.py::TestVecString::test_invalid_args_tuple -tests/test_defchararray.py::TestVecString::test_invalid_function_args -tests/test_defchararray.py::TestVecString::test_invalid_result_type -tests/test_defchararray.py::TestVecString::test_non_string_array -tests/test_defchararray.py::TestWhitespace::test1 tests/test_deprecations.py::TestAlen::test_alen tests/test_deprecations.py::TestArrayDataAttributeAssignmentDeprecation::test_data_attr_assignment tests/test_deprecations.py::TestBinaryReprInsufficientWidthParameterForRepresentation::test_insufficient_width_negative diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py index 95887cc65aaa..d16bab3aec4a 100644 --- a/utils/command_build_clib.py +++ b/utils/command_build_clib.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # ***************************************************************************** -# Copyright (c) 2016-2022, Intel Corporation +# Copyright (c) 2016-2023, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -63,7 +63,7 @@ # default variables (for Linux) _project_compiler = "icpx" _project_linker = "icpx" -_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func"] +_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"] _project_cmplr_flag_sycl = ["-fsycl"] _project_cmplr_flag_stdcpp_static = [] # This brakes TBB ["-static-libstdc++", "-static-libgcc"] _project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"]