Implement dpnp.cov() though existing dpnp methods (#1396)

antonwolfy · web-flow · commit f5e642546afc · 2023-06-15T09:20:54.000+02:00
* Implement dpnp.cov() though existing dpnp methods

* Applied review comments

* Clean up the code to get rid of todo

* use dpnp.mean()
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -124,7 +124,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_COUNT_NONZERO,                /**< Used in numpy.count_nonzero() impl  */
     DPNP_FN_COUNT_NONZERO_EXT,            /**< Used in numpy.count_nonzero() impl, requires extra parameters */
     DPNP_FN_COV,                          /**< Used in numpy.cov() impl  */
-    DPNP_FN_COV_EXT,                      /**< Used in numpy.cov() impl, requires extra parameters */
     DPNP_FN_CROSS,                        /**< Used in numpy.cross() impl  */
     DPNP_FN_CROSS_EXT,                    /**< Used in numpy.cross() impl, requires extra parameters */
     DPNP_FN_CUMPROD,                      /**< Used in numpy.cumprod() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
@@ -243,14 +243,6 @@ void dpnp_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols)
 template <typename _DataType>
 void (*dpnp_cov_default_c)(void*, void*, size_t, size_t) = dpnp_cov_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_cov_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    void*,
-                                    size_t,
-                                    size_t,
-                                    const DPCTLEventVectorRef) = dpnp_cov_c<_DataType>;
-
 template <typename _DataType_input, typename _DataType_output>
 DPCTLSyclEventRef dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref,
                                        void* array1_in,
@@ -1373,11 +1365,6 @@ void func_map_init_statistics(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_COV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
     fmap[DPNPFuncName::DPNP_FN_COV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_max_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_MAX][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_max_default_c<float>};
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -95,8 +95,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_COS_EXT
         DPNP_FN_COSH
         DPNP_FN_COSH_EXT
-        DPNP_FN_COV
-        DPNP_FN_COV_EXT
         DPNP_FN_COUNT_NONZERO
         DPNP_FN_COUNT_NONZERO_EXT
         DPNP_FN_CROSS
@@ -538,7 +536,6 @@ cpdef dpnp_descriptor dpnp_repeat(dpnp_descriptor array1, repeats, axes=*)
 """
 Statistics functions
 """
-cpdef dpnp_descriptor dpnp_cov(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_min(dpnp_descriptor a, axis)
 
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
@@ -38,7 +38,6 @@ and the rest of the library
 __all__ += [
     "dpnp_average",
     "dpnp_correlate",
-    "dpnp_cov",
     "dpnp_max",
     "dpnp_median",
     "dpnp_min",
@@ -178,49 +177,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
-    cdef shape_type_c input_shape = array1.shape
-
-    if array1.ndim == 1:
-        input_shape.insert(input_shape.begin(), 1)
-
-    # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
-
-    # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type)
-
-    array1_obj = array1.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef shape_type_c result_shape = (input_shape[0], input_shape[0])
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=array1_obj.sycl_device,
-                                                                       usm_type=array1_obj.usm_type,
-                                                                       sycl_queue=array1_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_custom_cov_1in_1out_t func = <fptr_custom_cov_1in_1out_t > kernel_data.ptr
-    # call FPTR function
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    array1.get_data(),
-                                                    result.get_data(),
-                                                    input_shape[0],
-                                                    input_shape[1],
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape):
     cdef shape_type_c x1_shape = x1.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
@@ -45,6 +45,9 @@
 from numpy.core.numeric import normalize_axis_tuple
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
+from dpnp.dpnp_utils.dpnp_utils_statistics import (
+    dpnp_cov
+)
 from dpnp.dpnp_array import dpnp_array
 import dpnp
 
@@ -238,13 +241,18 @@ def correlate(x1, x2, mode='valid'):
     return call_origin(numpy.correlate, x1, x2, mode=mode)
 
 
-def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
-    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
+def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
+    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
 
     Estimate a covariance matrix, given data and weights.
 
     For full documentation refer to :obj:`numpy.cov`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        The covariance matrix of the variables.
+
     Limitations
     -----------
     Input array ``m`` is supported as :obj:`dpnp.ndarray`.
@@ -258,7 +266,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
-    .. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
+    See Also
+    --------
+    :obj:`dpnp.corrcoef` : Normalized covariance matrix
 
     Examples
     --------
@@ -275,11 +285,10 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     [1.0, -1.0, -1.0, 1.0]
 
     """
-    if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
-        pass
-    elif x1.ndim > 2:
+
+    if not isinstance(m, (dpnp_array, dpt.usm_ndarray)):
         pass
-    elif y is not None:
+    elif m.ndim > 2:
         pass
     elif bias:
         pass
@@ -290,17 +299,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     elif aweights is not None:
         pass
     else:
-        if not rowvar and x1.shape[0] != 1:
-            x1 = x1.T
-
-        if not x1.dtype in (dpnp.float32, dpnp.float64):
-            x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))
-
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-        if x1_desc:
-            return dpnp_cov(x1_desc).get_pyobj()
+        return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)
 
-    return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
+    return call_origin(numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype)
 
 
 def histogram(a, bins=10, range=None, density=None, weights=None):
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -0,0 +1,117 @@
+# cython: language_level=3
+# distutils: language = c++
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpnp
+from dpnp.dpnp_array import dpnp_array
+from dpnp.dpnp_utils import (
+    get_usm_allocations
+)
+
+import dpctl
+import dpctl.tensor as dpt
+import dpctl.tensor._tensor_impl as ti
+
+
+__all__ = [
+    "dpnp_cov"
+]
+
+def dpnp_cov(m, y=None, rowvar=True, dtype=None):
+    """
+    Estimate a covariance matrix based on passed data.
+    No support for given weights is provided now.
+
+    The implementation is done through existing dpnp and dpctl methods
+    instead of separate function call of dpnp backend.
+
+    """
+
+    def _get_2dmin_array(x, dtype):
+        """
+        Transform an input array to a form required for building a covariance matrix.
+
+        If applicable, it reshapes the input array to have 2 dimensions or greater.
+        If applicable, it transposes the input array when 'rowvar' is False.
+        It casts to another dtype, if the input array differs from requested one.
+
+        """
+
+        if x.ndim == 0:
+            x = x.reshape((1, 1))
+        elif x.ndim == 1:
+            x = x[dpnp.newaxis, :]
+
+        if not rowvar and x.shape[0] != 1:
+            x = x.T
+
+        if x.dtype != dtype:
+            x = dpnp.astype(x, dtype)
+        return x
+
+
+    # input arrays must follow CFD paradigm
+    usm_type, queue = get_usm_allocations((m, ) if y is None else (m, y))
+
+    # calculate a type of result array if not passed explicitly
+    if dtype is None:
+        dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
+        if y is not None:
+            dtypes.append(y.dtype)
+        dtype = dpt.result_type(*dtypes)
+
+    X = _get_2dmin_array(m, dtype)
+    if y is not None:
+        y = _get_2dmin_array(y, dtype)
+
+        # TODO: replace with dpnp.concatenate((X, y), axis=0) once dpctl implementation is ready
+        if X.ndim != y.ndim:
+            raise ValueError("all the input arrays must have same number of dimensions")
+
+        if X.shape[1:] != y.shape[1:]:
+            raise ValueError("all the input array dimensions for the concatenation axis must match exactly")
+
+        res_shape = tuple(X.shape[i] if i > 0 else (X.shape[i] + y.shape[i]) for i in range(X.ndim))
+        res_usm = dpt.empty(res_shape, dtype=dtype, usm_type=usm_type, sycl_queue=queue)
+
+        # concatenate input arrays 'm' and 'y' into single array among 0-axis
+        hev1, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=X.get_array(), dst=res_usm[:X.shape[0]], sycl_queue=queue)
+        hev2, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=y.get_array(), dst=res_usm[X.shape[0]:], sycl_queue=queue)
+        dpctl.SyclEvent.wait_for([hev1, hev2])
+
+        X = dpnp_array._create_from_usm_ndarray(res_usm)
+
+    avg = X.mean(axis=1)
+
+    fact = X.shape[1] - 1
+    X -= avg[:, None]
+
+    c = dpnp.dot(X, X.T.conj())
+    c *= 1 / fact if fact != 0 else dpnp.nan
+
+    return dpnp.squeeze(c)
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
@@ -252,8 +252,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal
 
-tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty
-
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py
diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py