Implement dpnp.cov() though existing dpnp methods

antonwolfy · antonwolfy · commit 5b7ba6db83ec · 2023-05-11T06:26:19.000-05:00
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
@@ -44,6 +44,9 @@
 import dpctl.tensor as dpt
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
+from dpnp.dpnp_utils.dpnp_utils_statistics import (
+    dpnp_cov
+)
 from dpnp.dpnp_array import dpnp_array
 import dpnp
 
@@ -237,13 +240,18 @@ def correlate(x1, x2, mode='valid'):
     return call_origin(numpy.correlate, x1, x2, mode=mode)
 
 
-def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
-    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
+def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
+    """cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
 
     Estimate a covariance matrix, given data and weights.
 
     For full documentation refer to :obj:`numpy.cov`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        The covariance matrix of the variables.
+
     Limitations
     -----------
     Input array ``m`` is supported as :obj:`dpnp.ndarray`.
@@ -257,7 +265,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
-    .. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
+    See Also
+    --------
+    :obj:`dpnp.corrcoef` : Normalized covariance matrix
 
     Examples
     --------
@@ -274,11 +284,10 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     [1.0, -1.0, -1.0, 1.0]
 
     """
-    if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
-        pass
-    elif x1.ndim > 2:
+
+    if not isinstance(m, (dpnp_array, dpt.usm_ndarray)):
         pass
-    elif y is not None:
+    elif m.ndim > 2:
         pass
     elif bias:
         pass
@@ -289,18 +298,13 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     elif aweights is not None:
         pass
     else:
-        if not rowvar and x1.shape[0] != 1:
-            x1 = x1.get_array() if isinstance(x1, dpnp_array) else x1
-            x1 = dpnp_array._create_from_usm_ndarray(x1.mT)
-
-        if not x1.dtype in (dpnp.float32, dpnp.float64):
-            x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))
+        return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-        if x1_desc:
-            return dpnp_cov(x1_desc).get_pyobj()
+        # x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
+        # if x1_desc:
+        #     return dpnp_cov(x1_desc).get_pyobj()
 
-    return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
+    return call_origin(numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype)
 
 
 def histogram(a, bins=10, range=None, density=None, weights=None):
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -0,0 +1,130 @@
+# cython: language_level=3
+# distutils: language = c++
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpnp
+from dpnp.dpnp_array import dpnp_array
+from dpnp.dpnp_utils import (
+    get_usm_allocations
+)
+
+import dpctl
+import dpctl.tensor as dpt
+import dpctl.tensor._tensor_impl as ti
+
+# TODO: replace with calls from dpctl module
+import unary_fns
+
+
+__all__ = [
+    "dpnp_cov"
+]
+
+def dpnp_cov(m, y=None, rowvar=True, dtype=None):
+    """
+    Estimate a covariance matrix based on passed data.
+    No support for given wights is provided now.
+
+    The implementation is done though existing dpnp and dpctl methods
+    instead of separate function call of dnpn backend.
+
+    """
+
+    def _get_2dmin_array(x, dtype):
+        """
+        Transfor an input array to a form required for building a covariance matrix.
+
+        If applicable, it resahpes the imput array to have 2 dimensions or greater.
+        If applicable, it transposes the imput array when 'rowvar' is False.
+        It casts to another dtype, if the input array differs from requested one.
+
+        """
+
+        if x.ndim == 0:
+            x = x.reshape((1, 1))
+        elif m.ndim == 1:
+            x = x[dpnp.newaxis, :]
+
+        if not rowvar and x.shape[0] != 1:
+            # TODO: replace once ready with
+            # x = x.T
+            x = dpnp_array._create_from_usm_ndarray(x.get_array().T)
+
+        if x.dtype != dtype:
+            x = dpnp.astype(x, dtype)
+        return x
+
+
+    # input arrays must follow CFD paradigm
+    usm_type, queue = get_usm_allocations((m, ) if y is None else (m, y))
+
+    # calculate a type of result array if not passed explicitly
+    if dtype is None:
+        dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
+        if y is not None:
+            dtypes.append(y.dtype)
+        dtype = dpt.result_type(*dtypes)
+
+    X = _get_2dmin_array(m, dtype)
+    if y is not None:
+        y = _get_2dmin_array(y, dtype)
+
+        # TODO: replace with dpnp.concatenate((X, y), axis=0) once dpctl implementation is ready
+        if X.ndim != y.ndim:
+            raise ValueError("all the input arrays must have same number of dimensions")
+
+        if X.shape[1:] != y.shape[1:]:
+            raise ValueError("all the input array dimensions for the concatenation axis must match exactly")
+
+        res_shape = tuple(X.shape[i] if i > 0 else (X.shape[i] + y.shape[i]) for i in range(X.ndim))
+        res_usm = dpt.empty(res_shape, dtype=dtype, usm_type=usm_type, sycl_queue=queue)
+
+        # concatenate input arrays 'm' and 'y' into single array among 0-axis
+        hev1, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=X.get_array(), dst=res_usm[:X.shape[0]], sycl_queue=queue)
+        hev2, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=y.get_array(), dst=res_usm[X.shape[0]:], sycl_queue=queue)
+        dpctl.SyclEvent.wait_for([hev1, hev2])
+
+        X = dpnp_array._create_from_usm_ndarray(res_usm)
+
+    # TODO: replace once ready with
+    # avg = X.mean(axis=1)
+    # avg = X.sum(axis=1) / X.shape[1]
+    avg = unary_fns.sum(X.get_array(), axis=1) / X.shape[1]
+
+    fact = X.shape[1] - 1
+    X -= avg[:, None]
+
+    # TODO: replace once ready with
+    # c = dpnp.dot(X, X.T.conj())
+    c = dpnp.dot(X, dpnp_array._create_from_usm_ndarray(X.get_array().T).conj())
+    c *= 1 / fact if fact != 0 else dpnp.nan
+
+    # TODO: replace with dpnp.squeeze(c) once ready
+    usm_c = dpnp.get_usm_ndarray(c)
+    usm_c = dpt.squeeze(usm_c)
+    return dpnp_array._create_from_usm_ndarray(usm_c)
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
@@ -271,7 +271,7 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{extern
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_axis
-tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty
+
 tests/third_party/cupy/statistics_tests/test_meanvar.py::TestMeanVar::test_external_mean_axis
 
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_axis
diff --git a/tests/third_party/cupy/statistics_tests/test_correlation.py b/tests/third_party/cupy/statistics_tests/test_correlation.py
@@ -1,9 +1,11 @@
+import sys
 import unittest
 
 import numpy
 import pytest
 
 import dpnp as cupy
+from dpctl import select_default_device
 from tests.third_party.cupy import testing
 
 
@@ -37,9 +39,11 @@ def test_corrcoef_rowvar(self, xp, dtype):
         return xp.corrcoef(a, y=y, rowvar=False)
 
 
-@testing.gpu
 class TestCov(unittest.TestCase):
 
+    # resulting dtype will differ with numpy if no fp64 support by a default device
+    _has_fp64 = select_default_device().has_aspect_fp64
+
     def generate_input(self, a_shape, y_shape, xp, dtype):
         a = testing.shaped_arange(a_shape, xp, dtype)
         y = None
@@ -48,27 +52,40 @@ def generate_input(self, a_shape, y_shape, xp, dtype):
         return a, y
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose(type_check=_has_fp64, accept_error=True)
     def check(self, a_shape, y_shape=None, rowvar=True, bias=False,
-              ddof=None, xp=None, dtype=None):
+              ddof=None, xp=None, dtype=None,
+              fweights=None, aweights=None, name=None):
         a, y = self.generate_input(a_shape, y_shape, xp, dtype)
-        return xp.cov(a, y, rowvar, bias, ddof)
+        if fweights is not None:
+            fweights = name.asarray(fweights)
+        if aweights is not None:
+            aweights = name.asarray(aweights)
+        # print(type(fweights))
+        # return xp.cov(a, y, rowvar, bias, ddof,
+        #               fweights, aweights, dtype=dtype)
+        return xp.cov(a, y, rowvar, bias, ddof,
+                      fweights, aweights)
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(accept_error=True)
     def check_warns(self, a_shape, y_shape=None, rowvar=True, bias=False,
-                    ddof=None, xp=None, dtype=None):
+                    ddof=None, xp=None, dtype=None,
+                    fweights=None, aweights=None):
         with testing.assert_warns(RuntimeWarning):
             a, y = self.generate_input(a_shape, y_shape, xp, dtype)
-            return xp.cov(a, y, rowvar, bias, ddof)
+            return xp.cov(a, y, rowvar, bias, ddof,
+                          fweights, aweights, dtype=dtype)
 
     @testing.for_all_dtypes()
-    def check_raises(self, a_shape, y_shape=None, rowvar=True, bias=False,
-                     ddof=None, dtype=None):
+    def check_raises(self, a_shape, y_shape=None,
+                     rowvar=True, bias=False, ddof=None,
+                     dtype=None, fweights=None, aweights=None):
         for xp in (numpy, cupy):
             a, y = self.generate_input(a_shape, y_shape, xp, dtype)
             with pytest.raises(ValueError):
-                xp.cov(a, y, rowvar, bias, ddof)
+                xp.cov(a, y, rowvar, bias, ddof,
+                       fweights, aweights, dtype=dtype)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov(self):
@@ -78,6 +95,12 @@ def test_cov(self):
         self.check((2, 3), (2, 3), rowvar=False)
         self.check((2, 3), bias=True)
         self.check((2, 3), ddof=2)
+        self.check((2, 3))
+        self.check((1, 3), fweights=(1, 4, 1))
+        self.check((1, 3), aweights=(1.0, 4.0, 1.0))
+        self.check((1, 3), bias=True, aweights=(1.0, 4.0, 1.0))
+        self.check((1, 3), fweights=(1, 4, 1),
+                   aweights=(1.0, 4.0, 1.0))
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_cov_warns(self):
diff --git a/tests/third_party/cupy/testing/helper.py b/tests/third_party/cupy/testing/helper.py