Skip to content

Commit f5e6425

Browse files
authored
Implement dpnp.cov() though existing dpnp methods (#1396)
* Implement dpnp.cov() though existing dpnp methods * Applied review comments * Clean up the code to get rid of todo * use dpnp.mean()
1 parent 5a3f438 commit f5e6425

File tree

9 files changed

+233
-125
lines changed

9 files changed

+233
-125
lines changed

dpnp/backend/include/dpnp_iface_fptr.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ enum class DPNPFuncName : size_t
124124
DPNP_FN_COUNT_NONZERO, /**< Used in numpy.count_nonzero() impl */
125125
DPNP_FN_COUNT_NONZERO_EXT, /**< Used in numpy.count_nonzero() impl, requires extra parameters */
126126
DPNP_FN_COV, /**< Used in numpy.cov() impl */
127-
DPNP_FN_COV_EXT, /**< Used in numpy.cov() impl, requires extra parameters */
128127
DPNP_FN_CROSS, /**< Used in numpy.cross() impl */
129128
DPNP_FN_CROSS_EXT, /**< Used in numpy.cross() impl, requires extra parameters */
130129
DPNP_FN_CUMPROD, /**< Used in numpy.cumprod() impl */

dpnp/backend/kernels/dpnp_krnl_statistics.cpp

-13
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,6 @@ void dpnp_cov_c(void* array1_in, void* result1, size_t nrows, size_t ncols)
243243
template <typename _DataType>
244244
void (*dpnp_cov_default_c)(void*, void*, size_t, size_t) = dpnp_cov_c<_DataType>;
245245

246-
template <typename _DataType>
247-
DPCTLSyclEventRef (*dpnp_cov_ext_c)(DPCTLSyclQueueRef,
248-
void*,
249-
void*,
250-
size_t,
251-
size_t,
252-
const DPCTLEventVectorRef) = dpnp_cov_c<_DataType>;
253-
254246
template <typename _DataType_input, typename _DataType_output>
255247
DPCTLSyclEventRef dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref,
256248
void* array1_in,
@@ -1373,11 +1365,6 @@ void func_map_init_statistics(func_map_t& fmap)
13731365
fmap[DPNPFuncName::DPNP_FN_COV][eft_FLT][eft_FLT] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
13741366
fmap[DPNPFuncName::DPNP_FN_COV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_default_c<double>};
13751367

1376-
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_INT][eft_INT] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
1377-
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_LNG][eft_LNG] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
1378-
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_cov_ext_c<float>};
1379-
fmap[DPNPFuncName::DPNP_FN_COV_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_cov_ext_c<double>};
1380-
13811368
fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_max_default_c<int32_t>};
13821369
fmap[DPNPFuncName::DPNP_FN_MAX][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_max_default_c<int64_t>};
13831370
fmap[DPNPFuncName::DPNP_FN_MAX][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_max_default_c<float>};

dpnp/dpnp_algo/dpnp_algo.pxd

-3
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na
9595
DPNP_FN_COS_EXT
9696
DPNP_FN_COSH
9797
DPNP_FN_COSH_EXT
98-
DPNP_FN_COV
99-
DPNP_FN_COV_EXT
10098
DPNP_FN_COUNT_NONZERO
10199
DPNP_FN_COUNT_NONZERO_EXT
102100
DPNP_FN_CROSS
@@ -538,7 +536,6 @@ cpdef dpnp_descriptor dpnp_repeat(dpnp_descriptor array1, repeats, axes=*)
538536
"""
539537
Statistics functions
540538
"""
541-
cpdef dpnp_descriptor dpnp_cov(dpnp_descriptor array1)
542539
cpdef dpnp_descriptor dpnp_min(dpnp_descriptor a, axis)
543540

544541

dpnp/dpnp_algo/dpnp_algo_statistics.pxi

-44
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ and the rest of the library
3838
__all__ += [
3939
"dpnp_average",
4040
"dpnp_correlate",
41-
"dpnp_cov",
4241
"dpnp_max",
4342
"dpnp_median",
4443
"dpnp_min",
@@ -178,49 +177,6 @@ cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_
178177
return result
179178

180179

181-
cpdef utils.dpnp_descriptor dpnp_cov(utils.dpnp_descriptor array1):
182-
cdef shape_type_c input_shape = array1.shape
183-
184-
if array1.ndim == 1:
185-
input_shape.insert(input_shape.begin(), 1)
186-
187-
# convert string type names (array.dtype) to C enum DPNPFuncType
188-
cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(array1.dtype)
189-
190-
# get the FPTR data structure
191-
cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_COV_EXT, param1_type, param1_type)
192-
193-
array1_obj = array1.get_array()
194-
195-
# ceate result array with type given by FPTR data
196-
cdef shape_type_c result_shape = (input_shape[0], input_shape[0])
197-
cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
198-
kernel_data.return_type,
199-
None,
200-
device=array1_obj.sycl_device,
201-
usm_type=array1_obj.usm_type,
202-
sycl_queue=array1_obj.sycl_queue)
203-
204-
result_sycl_queue = result.get_array().sycl_queue
205-
206-
cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
207-
cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
208-
209-
cdef fptr_custom_cov_1in_1out_t func = <fptr_custom_cov_1in_1out_t > kernel_data.ptr
210-
# call FPTR function
211-
cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
212-
array1.get_data(),
213-
result.get_data(),
214-
input_shape[0],
215-
input_shape[1],
216-
NULL) # dep_events_ref
217-
218-
with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
219-
c_dpctl.DPCTLEvent_Delete(event_ref)
220-
221-
return result
222-
223-
224180
cdef utils.dpnp_descriptor _dpnp_max(utils.dpnp_descriptor x1, _axis_, shape_type_c result_shape):
225181
cdef shape_type_c x1_shape = x1.shape
226182
cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)

dpnp/dpnp_iface_statistics.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
from numpy.core.numeric import normalize_axis_tuple
4646
from dpnp.dpnp_algo import *
4747
from dpnp.dpnp_utils import *
48+
from dpnp.dpnp_utils.dpnp_utils_statistics import (
49+
dpnp_cov
50+
)
4851
from dpnp.dpnp_array import dpnp_array
4952
import dpnp
5053

@@ -238,13 +241,18 @@ def correlate(x1, x2, mode='valid'):
238241
return call_origin(numpy.correlate, x1, x2, mode=mode)
239242

240243

241-
def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
242-
"""cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
244+
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
245+
"""cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, *, dtype=None):
243246
244247
Estimate a covariance matrix, given data and weights.
245248
246249
For full documentation refer to :obj:`numpy.cov`.
247250
251+
Returns
252+
-------
253+
out : dpnp.ndarray
254+
The covariance matrix of the variables.
255+
248256
Limitations
249257
-----------
250258
Input array ``m`` is supported as :obj:`dpnp.ndarray`.
@@ -258,7 +266,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
258266
Otherwise the function will be executed sequentially on CPU.
259267
Input array data types are limited by supported DPNP :ref:`Data types`.
260268
261-
.. see also:: :obj:`dpnp.corrcoef` normalized covariance matrix.
269+
See Also
270+
--------
271+
:obj:`dpnp.corrcoef` : Normalized covariance matrix
262272
263273
Examples
264274
--------
@@ -275,11 +285,10 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
275285
[1.0, -1.0, -1.0, 1.0]
276286
277287
"""
278-
if not isinstance(x1, (dpnp_array, dpt.usm_ndarray)):
279-
pass
280-
elif x1.ndim > 2:
288+
289+
if not isinstance(m, (dpnp_array, dpt.usm_ndarray)):
281290
pass
282-
elif y is not None:
291+
elif m.ndim > 2:
283292
pass
284293
elif bias:
285294
pass
@@ -290,17 +299,9 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
290299
elif aweights is not None:
291300
pass
292301
else:
293-
if not rowvar and x1.shape[0] != 1:
294-
x1 = x1.T
295-
296-
if not x1.dtype in (dpnp.float32, dpnp.float64):
297-
x1 = dpnp.astype(x1, dpnp.default_float_type(sycl_queue=x1.sycl_queue))
298-
299-
x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
300-
if x1_desc:
301-
return dpnp_cov(x1_desc).get_pyobj()
302+
return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)
302303

303-
return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
304+
return call_origin(numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype)
304305

305306

306307
def histogram(a, bins=10, range=None, density=None, weights=None):
+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# cython: language_level=3
2+
# distutils: language = c++
3+
# -*- coding: utf-8 -*-
4+
# *****************************************************************************
5+
# Copyright (c) 2023, Intel Corporation
6+
# All rights reserved.
7+
#
8+
# Redistribution and use in source and binary forms, with or without
9+
# modification, are permitted provided that the following conditions are met:
10+
# - Redistributions of source code must retain the above copyright notice,
11+
# this list of conditions and the following disclaimer.
12+
# - Redistributions in binary form must reproduce the above copyright notice,
13+
# this list of conditions and the following disclaimer in the documentation
14+
# and/or other materials provided with the distribution.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26+
# THE POSSIBILITY OF SUCH DAMAGE.
27+
# *****************************************************************************
28+
29+
30+
import dpnp
31+
from dpnp.dpnp_array import dpnp_array
32+
from dpnp.dpnp_utils import (
33+
get_usm_allocations
34+
)
35+
36+
import dpctl
37+
import dpctl.tensor as dpt
38+
import dpctl.tensor._tensor_impl as ti
39+
40+
41+
__all__ = [
42+
"dpnp_cov"
43+
]
44+
45+
def dpnp_cov(m, y=None, rowvar=True, dtype=None):
46+
"""
47+
Estimate a covariance matrix based on passed data.
48+
No support for given weights is provided now.
49+
50+
The implementation is done through existing dpnp and dpctl methods
51+
instead of separate function call of dpnp backend.
52+
53+
"""
54+
55+
def _get_2dmin_array(x, dtype):
56+
"""
57+
Transform an input array to a form required for building a covariance matrix.
58+
59+
If applicable, it reshapes the input array to have 2 dimensions or greater.
60+
If applicable, it transposes the input array when 'rowvar' is False.
61+
It casts to another dtype, if the input array differs from requested one.
62+
63+
"""
64+
65+
if x.ndim == 0:
66+
x = x.reshape((1, 1))
67+
elif x.ndim == 1:
68+
x = x[dpnp.newaxis, :]
69+
70+
if not rowvar and x.shape[0] != 1:
71+
x = x.T
72+
73+
if x.dtype != dtype:
74+
x = dpnp.astype(x, dtype)
75+
return x
76+
77+
78+
# input arrays must follow CFD paradigm
79+
usm_type, queue = get_usm_allocations((m, ) if y is None else (m, y))
80+
81+
# calculate a type of result array if not passed explicitly
82+
if dtype is None:
83+
dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
84+
if y is not None:
85+
dtypes.append(y.dtype)
86+
dtype = dpt.result_type(*dtypes)
87+
88+
X = _get_2dmin_array(m, dtype)
89+
if y is not None:
90+
y = _get_2dmin_array(y, dtype)
91+
92+
# TODO: replace with dpnp.concatenate((X, y), axis=0) once dpctl implementation is ready
93+
if X.ndim != y.ndim:
94+
raise ValueError("all the input arrays must have same number of dimensions")
95+
96+
if X.shape[1:] != y.shape[1:]:
97+
raise ValueError("all the input array dimensions for the concatenation axis must match exactly")
98+
99+
res_shape = tuple(X.shape[i] if i > 0 else (X.shape[i] + y.shape[i]) for i in range(X.ndim))
100+
res_usm = dpt.empty(res_shape, dtype=dtype, usm_type=usm_type, sycl_queue=queue)
101+
102+
# concatenate input arrays 'm' and 'y' into single array among 0-axis
103+
hev1, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=X.get_array(), dst=res_usm[:X.shape[0]], sycl_queue=queue)
104+
hev2, _ = ti._copy_usm_ndarray_into_usm_ndarray(src=y.get_array(), dst=res_usm[X.shape[0]:], sycl_queue=queue)
105+
dpctl.SyclEvent.wait_for([hev1, hev2])
106+
107+
X = dpnp_array._create_from_usm_ndarray(res_usm)
108+
109+
avg = X.mean(axis=1)
110+
111+
fact = X.shape[1] - 1
112+
X -= avg[:, None]
113+
114+
c = dpnp.dot(X, X.T.conj())
115+
c *= 1 / fact if fact != 0 else dpnp.nan
116+
117+
return dpnp.squeeze(c)

tests/skipped_tests_gpu.tbl

-2
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMult
252252
tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
253253
tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_3_{d=4, shape=(3, 2)}::test_normal
254254

255-
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCov::test_cov_empty
256-
257255
tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
258256
tests/test_arraymanipulation.py::TestHstack::test_generator
259257
tests/test_arraymanipulation.py::TestVstack::test_generator

0 commit comments

Comments
 (0)