diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index edb6218efac4..df81315ba486 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -22,6 +22,7 @@ env: test_dparray.py test_copy.py test_fft.py + test_histogram.py test_linalg.py test_logic.py test_manipulation.py @@ -49,6 +50,7 @@ env: third_party/cupy/math_tests third_party/cupy/sorting_tests/test_sort.py third_party/cupy/sorting_tests/test_count.py + third_party/cupy/statistics_tests/test_histogram.py third_party/cupy/statistics_tests/test_meanvar.py VER_JSON_NAME: 'version.json' VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); " diff --git a/doc/reference/statistics.rst b/doc/reference/statistics.rst index 6a1b14db0cb5..540b6e314a62 100644 --- a/doc/reference/statistics.rst +++ b/doc/reference/statistics.rst @@ -27,13 +27,25 @@ Averages and variances dpnp.median dpnp.average dpnp.mean - dpnp.var dpnp.std + dpnp.var dpnp.nanmean dpnp.nanvar dpnp.nanstd +Correlations +------------ + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + dpnp.corrcoef + dpnp.cov + dpnp.correlate + + Histograms ---------- @@ -47,15 +59,3 @@ Histograms dpnp.bincount dpnp.histogram_bin_edges dpnp.digitize - - -Correlations ------------- - -.. autosummary:: - :toctree: generated/ - :nosignatures: - - dpnp.corrcoef - dpnp.cov - dpnp.correlate diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 8769b503003b..0cf37845534d 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -78,6 +78,8 @@ from dpnp.dpnp_iface_bitwise import __all__ as __all__bitwise from dpnp.dpnp_iface_counting import * from dpnp.dpnp_iface_counting import __all__ as __all__counting +from dpnp.dpnp_iface_histograms import * +from dpnp.dpnp_iface_histograms import __all__ as __all__histograms from dpnp.dpnp_iface_indexing import * from dpnp.dpnp_iface_indexing import __all__ as __all__indexing from dpnp.dpnp_iface_libmath import * @@ -111,6 +113,7 @@ __all__ += __all__arraycreation __all__ += __all__bitwise __all__ += __all__counting +__all__ += __all__histograms __all__ += __all__indexing __all__ += __all__libmath __all__ += __all__linearalgebra diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py new file mode 100644 index 000000000000..ce8c0ff90a55 --- /dev/null +++ b/dpnp/dpnp_iface_histograms.py @@ -0,0 +1,337 @@ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +""" +Interface of histogram-related DPNP functions + +Notes +----- +This module is a face or public interface file for the library +it contains: + - Interface functions + - documentation for the functions + - The functions parameters check + +""" + +import operator +import warnings + +import dpctl.utils as dpu +import numpy + +import dpnp + +__all__ = [ + "histogram", +] + +# range is a keyword argument to many functions, so save the builtin so they can +# use it. +_range = range + + +def _ravel_check_a_and_weights(a, weights): + """Check input `a` and `weights` arrays, and ravel both.""" + + # ensure that `a` array has supported type + dpnp.check_supported_arrays_type(a) + usm_type = a.usm_type + + # ensure that the array is a "subtractable" dtype + if a.dtype == dpnp.bool: + warnings.warn( + f"Converting input from {a.dtype} to {numpy.uint8} " + "for compatibility.", + RuntimeWarning, + stacklevel=3, + ) + a = a.astype(numpy.uint8) + + if weights is not None: + # check that `weights` array has supported type + dpnp.check_supported_arrays_type(weights) + usm_type = dpu.get_coerced_usm_type([usm_type, weights.usm_type]) + + # check that arrays have the same allocation queue + if dpu.get_execution_queue([a.sycl_queue, weights.sycl_queue]) is None: + raise ValueError( + "a and weights must be allocated on the same SYCL queue" + ) + + if weights.shape != a.shape: + raise ValueError("weights should have the same shape as a.") + weights = weights.ravel() + a = a.ravel() + return a, weights, usm_type + + +def _get_outer_edges(a, range): + """ + Determine the outer bin edges to use, from either the data or the range + argument. + + """ + + if range is not None: + first_edge, last_edge = range + if first_edge > last_edge: + raise ValueError("max must be larger than min in range parameter.") + + if not (numpy.isfinite(first_edge) and numpy.isfinite(last_edge)): + raise ValueError( + f"supplied range of [{first_edge}, {last_edge}] is not finite" + ) + + elif a.size == 0: + # handle empty arrays. Can't determine range, so use 0-1. + first_edge, last_edge = 0, 1 + + else: + first_edge, last_edge = a.min(), a.max() + if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): + raise ValueError( + f"autodetected range of [{first_edge}, {last_edge}] " + "is not finite" + ) + + # expand empty range to avoid divide by zero + if first_edge == last_edge: + first_edge = first_edge - 0.5 + last_edge = last_edge + 0.5 + + return first_edge, last_edge + + +def _get_bin_edges(a, bins, range, usm_type): + """Computes the bins used internally by `histogram`.""" + + # parse the overloaded bins argument + n_equal_bins = None + bin_edges = None + sycl_queue = a.sycl_queue + + if isinstance(bins, str): + # TODO: implement support of string bins + raise NotImplementedError("only integer and array bins are implemented") + + if numpy.ndim(bins) == 0: + try: + n_equal_bins = operator.index(bins) + except TypeError as e: + raise TypeError("`bins` must be an integer or an array") from e + if n_equal_bins < 1: + raise ValueError("`bins` must be positive, when an integer") + + first_edge, last_edge = _get_outer_edges(a, range) + + elif numpy.ndim(bins) == 1: + if dpnp.is_supported_array_type(bins): + if dpu.get_execution_queue([a.sycl_queue, bins.sycl_queue]) is None: + raise ValueError( + "a and bins must be allocated on the same SYCL queue" + ) + + bin_edges = bins + else: + bin_edges = dpnp.asarray( + bins, sycl_queue=sycl_queue, usm_type=usm_type + ) + + if dpnp.any(bin_edges[:-1] > bin_edges[1:]): + raise ValueError( + "`bins` must increase monotonically, when an array" + ) + + else: + raise ValueError("`bins` must be 1d, when an array") + + if n_equal_bins is not None: + # numpy's gh-10322 means that type resolution rules are dependent on + # array shapes. To avoid this causing problems, we pick a type now and + # stick with it throughout. + bin_type = dpnp.result_type(first_edge, last_edge, a) + if dpnp.issubdtype(bin_type, dpnp.integer): + bin_type = dpnp.result_type( + bin_type, dpnp.default_float_type(sycl_queue=sycl_queue), a + ) + + # bin edges must be computed + bin_edges = dpnp.linspace( + first_edge, + last_edge, + n_equal_bins + 1, + endpoint=True, + dtype=bin_type, + sycl_queue=sycl_queue, + usm_type=usm_type, + ) + return bin_edges, (first_edge, last_edge, n_equal_bins) + return bin_edges, None + + +def _search_sorted_inclusive(a, v): + """ + Like :obj:`dpnp.searchsorted`, but where the last item in `v` is placed + on the right. + In the context of a histogram, this makes the last bin edge inclusive + + """ + + return dpnp.concatenate( + (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right")) + ) + + +def histogram(a, bins=10, range=None, density=None, weights=None): + """ + Compute the histogram of a dataset. + + For full documentation refer to :obj:`numpy.histogram`. + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input data. The histogram is computed over the flattened array. + bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional + If `bins` is an int, it defines the number of equal-width bins in the + given range (``10``, by default). + If `bins` is a sequence, it defines a monotonically increasing array + of bin edges, including the rightmost edge, allowing for non-uniform + bin widths. + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by :obj:`dpnp.histogram_bin_edges`. + range : {2-tuple of float}, optional + The lower and upper range of the bins. If not provided, range is simply + ``(a.min(), a.max())``. Values outside the range are ignored. The first + element of the range must be less than or equal to the second. `range` + affects the automatic bin computation as well. While bin width is + computed to be optimal based on the actual data within `range`, the bin + count will fill the entire range including portions containing no data. + weights : {dpnp.ndarray, usm_ndarray}, optional + An array of weights, of the same shape as `a`. Each value in `a` only + contributes its associated weight towards the bin count (instead of 1). + If `density` is ``True``, the weights are normalized, so that the + integral of the density over the range remains ``1``. + Please note that the ``dtype`` of `weights` will also become the + ``dtype`` of the returned accumulator (`hist`), so it must be large + enough to hold accumulated values as well. + density : {bool}, optional + If ``False``, the result will contain the number of samples in each bin. + If ``True``, the result is the value of the probability *density* + function at the bin, normalized such that the *integral* over the range + is ``1``. Note that the sum of the histogram values will not be equal + to ``1`` unless bins of unity width are chosen; it is not a probability + *mass* function. + + Returns + ------- + hist : {dpnp.ndarray} + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. If `weights` are given, + ``hist.dtype`` will be taken from `weights`. + bin_edges : {dpnp.ndarray of floating data type} + Return the bin edges ``(length(hist) + 1)``. + + See Also + -------- + :obj:`dpnp.histogramdd` : Compute the multidimensional histogram. + :obj:`dpnp.bincount` : Count number of occurrences of each value in array + of non-negative integers. + :obj:`dpnp.searchsorted` : Find indices where elements should be inserted + to maintain order. + :obj:`dpnp.digitize` : Return the indices of the bins to which each value + in input array belongs. + :obj:`dpnp.histogram_bin_edges` : Return only the edges of the bins used + by the obj:`dpnp.histogram` function. + + Examples + -------- + >>> import dpnp as np + >>> np.histogram(np.array([1, 2, 1]), bins=[0, 1, 2, 3]) + (array([0, 2, 1]), array([0, 1, 2, 3])) + >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) + (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + >>> np.histogram(np.array([[1, 2, 1], [1, 0, 1]]), bins=[0, 1, 2, 3]) + (array([1, 4, 1]), array([0, 1, 2, 3])) + + >>> a = np.arange(5) + >>> hist, bin_edges = np.histogram(a, density=True) + >>> hist + array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + >>> hist.sum() + array(2.5) + >>> np.sum(hist * np.diff(bin_edges)) + array(1.) + + """ + + a, weights, usm_type = _ravel_check_a_and_weights(a, weights) + + bin_edges, uniform_bins = _get_bin_edges(a, bins, range, usm_type) + + # Histogram is an integer or a float array depending on the weights. + if weights is None: + ntype = dpnp.dtype(dpnp.intp) + else: + ntype = weights.dtype + + # The fast path uses bincount, but that only works for certain types + # of weight + # simple_weights = ( + # weights is None or + # dpnp.can_cast(weights.dtype, dpnp.double) or + # dpnp.can_cast(weights.dtype, complex) + # ) + # TODO: implement a fast path + simple_weights = False + + if uniform_bins is not None and simple_weights: + # TODO: implement fast algorithm for equal bins + pass + else: + # Compute via cumulative histogram + if weights is None: + sa = dpnp.sort(a) + cum_n = _search_sorted_inclusive(sa, bin_edges) + else: + zero = dpnp.zeros( + 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type + ) + sorting_index = dpnp.argsort(a) + sa = a[sorting_index] + sw = weights[sorting_index] + cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) + bin_index = _search_sorted_inclusive(sa, bin_edges) + cum_n = cw[bin_index] + + n = dpnp.diff(cum_n) + + if density: + db = dpnp.diff(bin_edges).astype(dpnp.default_float_type()) + return n / db / n.sum(), bin_edges + + return n, bin_edges diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index db323456bf40..cd17857b6068 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -65,7 +65,6 @@ "bincount", "correlate", "cov", - "histogram", "max", "mean", "median", @@ -452,43 +451,6 @@ def cov( ) -def histogram(a, bins=10, range=None, density=None, weights=None): - """ - Compute the histogram of a dataset. - - For full documentation refer to :obj:`numpy.histogram`. - - Examples - -------- - >>> import dpnp - >>> dpnp.histogram([1, 2, 1], bins=[0, 1, 2, 3]) - (array([0, 2, 1]), array([0, 1, 2, 3])) - >>> dpnp.histogram(dpnp.arange(4), bins=dpnp.arange(5), density=True) - (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) - >>> dpnp.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) - (array([1, 4, 1]), array([0, 1, 2, 3])) - >>> a = dpnp.arange(5) - >>> hist, bin_edges = dpnp.histogram(a, density=True) - >>> hist - array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) - >>> hist.sum() - 2.4999999999999996 - >>> res = dpnp.sum(hist * dpnp.diff(bin_edges)) - >>> print(res) - 1.0 - - """ - - return call_origin( - numpy.histogram, - a=a, - bins=bins, - range=range, - density=density, - weights=weights, - ) - - def max(a, axis=None, out=None, keepdims=False, initial=None, where=True): """ Return the maximum of an array or maximum along an axis. diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 899267fcc85d..9ef4091f7482 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1,5 +1,3 @@ -tests/test_histograms.py::TestHistogram::test_density - tests/test_random.py::TestDistributionsMultivariateNormal::test_moments tests/test_random.py::TestDistributionsMultivariateNormal::test_output_shape_check tests/test_random.py::TestDistributionsMultivariateNormal::test_seed @@ -733,40 +731,6 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_duplicated_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_invalid_weight_length -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_negative -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_deep -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_weight -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_zero -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_nonuniform_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_normalized -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range2 -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_list_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_numpy_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_lower_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_upper_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 98d67bf44da7..4688c1787fc1 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -81,8 +81,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPois tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_2_{lam_shape=(3, 2), shape=(4, 3, 2)}::test_poisson tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_3_{lam_shape=(3, 2), shape=(3, 2)}::test_poisson -tests/test_histograms.py::TestHistogram::test_density - tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_0_{shape=()}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_1_{shape=(1,)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_2_{shape=(2, 3)}::test_item @@ -757,40 +755,6 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_duplicated_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_invalid_weight_length -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_negative -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_deep -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_weight -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_zero -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_nonuniform_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_normalized -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range2 -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_list_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_numpy_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_lower_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_upper_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower] diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index 59c8c514a04f..727aa8238c89 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -7,7 +7,6 @@ import pytest from numpy.testing import ( assert_allclose, - assert_almost_equal, assert_array_equal, ) diff --git a/tests/test_histogram.py b/tests/test_histogram.py new file mode 100644 index 000000000000..ebe7b4715dff --- /dev/null +++ b/tests/test_histogram.py @@ -0,0 +1,391 @@ +import dpctl +import numpy +import pytest +from numpy.testing import ( + assert_, + assert_allclose, + assert_array_equal, + assert_raises, + assert_raises_regex, + suppress_warnings, +) + +import dpnp + +from .helper import ( + assert_dtype_allclose, + get_all_dtypes, + has_support_aspect64, +) + + +class TestHistogram: + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_rand_data(self, dtype): + n = 100 + v = numpy.random.rand(n).astype(dtype=dtype) + iv = dpnp.array(v, dtype=dtype) + + expected_hist, _ = numpy.histogram(v) + result_hist, _ = dpnp.histogram(iv) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_linspace_data(self, dtype): + v = numpy.linspace(0, 10, 100, dtype=dtype) + iv = dpnp.array(v) + + expected_hist, _ = numpy.histogram(v) + result_hist, _ = dpnp.histogram(iv) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.parametrize( + "data, bins_data", + [ + pytest.param([1, 2, 3, 4], [1, 2], id="1d-1d"), + pytest.param([1, 2], 1, id="1d-0d"), + ], + ) + def test_one_bin(self, data, bins_data): + a = numpy.array(data) + bins = numpy.array(bins_data) + + ia = dpnp.array(a) + ibins = dpnp.array(bins) + expected_hist, expected_edges = numpy.histogram(a, bins=bins) + result_hist, result_edges = dpnp.histogram(ia, bins=ibins) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_zero_bin(self, xp): + a = xp.array([1, 2]) + assert_raises(ValueError, xp.histogram, a, bins=0) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_density(self, dtype): + n = 100 + v = numpy.random.rand(n).astype(dtype=dtype) + iv = dpnp.array(v, dtype=dtype) + + expected_hist, expected_edges = numpy.histogram(v, density=True) + result_hist, result_edges = dpnp.histogram(iv, density=True) + + if numpy.issubdtype(dtype, numpy.inexact): + tol = numpy.finfo(dtype).resolution + assert_allclose(result_hist, expected_hist, rtol=tol, atol=tol) + assert_allclose(result_edges, expected_edges, rtol=tol, atol=tol) + else: + assert_dtype_allclose(result_hist, expected_hist) + assert_dtype_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("density", [True, False]) + def test_bin_density(self, density): + bins = [0, 1, 3, 6, 10] + v = numpy.arange(10) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram( + v, bins, density=density + ) + result_hist, result_edges = dpnp.histogram(iv, bins, density=density) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize( + "bins", [[0, 1, 3, 6, numpy.inf], [0.5, 1.5, numpy.inf]] + ) + def test_bin_inf(self, bins): + v = numpy.arange(10) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram(v, bins, density=True) + result_hist, result_edges = dpnp.histogram(iv, bins, density=True) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize("range", [[0, 9], [1, 10]], ids=["lower", "upper"]) + def test_outliers(self, range): + a = numpy.arange(10) + 0.5 + ia = dpnp.array(a) + + expected_hist, expected_edges = numpy.histogram(a, range=range) + result_hist, result_edges = dpnp.histogram(ia, range=range) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + def test_outliers_normalization_weights(self): + range = [1, 9] + a = numpy.arange(10) + 0.5 + ia = dpnp.array(a) + + # Normalization + expected_hist, expected_edges = numpy.histogram(a, range, density=True) + result_hist, result_edges = dpnp.histogram(ia, range, density=True) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + w = numpy.arange(10) + 0.5 + iw = dpnp.array(w) + + # Weights + expected_hist, expected_edges = numpy.histogram( + a, range, weights=w, density=True + ) + result_hist, result_edges = dpnp.histogram( + ia, range, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + expected_hist, expected_edges = numpy.histogram( + a, bins=8, range=range, weights=w, density=True + ) + result_hist, result_edges = dpnp.histogram( + ia, bins=8, range=range, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_bool_conversion(self, xp): + a = xp.array([1, 1, 0], dtype=numpy.uint8) + int_hist, int_edges = xp.histogram(a) + + with suppress_warnings() as sup: + rec = sup.record(RuntimeWarning, "Converting input from .*") + + v = xp.array([True, True, False]) + hist, edges = xp.histogram(v) + + # A warning should be issued + assert len(rec) == 1 + assert_array_equal(hist, int_hist) + assert_array_equal(edges, int_edges) + + @pytest.mark.parametrize("density", [True, False]) + def test_weights(self, density): + v = numpy.random.rand(100) + w = numpy.ones(100) * 5 + + iv = dpnp.array(v) + iw = dpnp.array(w) + + expected_hist, expected_edges = numpy.histogram( + v, weights=w, density=density + ) + result_hist, result_edges = dpnp.histogram( + iv, weights=iw, density=density + ) + assert_dtype_allclose(result_hist, expected_hist) + assert_dtype_allclose(result_edges, expected_edges) + + def test_integer_weights(self): + v = numpy.array([1, 2, 2, 4]) + w = numpy.array([4, 3, 2, 1]) + + iv = dpnp.array(v) + iw = dpnp.array(w) + + expected_hist, expected_edges = numpy.histogram(v, bins=4, weights=w) + result_hist, result_edges = dpnp.histogram(iv, bins=4, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_weights_non_uniform_bin_widths(self): + bins = [0, 1, 3, 6, 10] + v = numpy.arange(9) + w = numpy.array([2, 1, 1, 1, 1, 1, 1, 1, 1]) + + iv = dpnp.array(v) + iw = dpnp.array(w) + + expected_hist, expected_edges = numpy.histogram( + v, bins, weights=w, density=True + ) + result_hist, result_edges = dpnp.histogram( + iv, bins, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_weights_complex_dtype(self): + bins = [0, 2, 3] + v = numpy.array([1.3, 2.5, 2.3]) + w = numpy.array([1, -1, 2]) + 1j * numpy.array([2, 1, 2]) + + iv = dpnp.array(v) + iw = dpnp.array(w) + + # with custom bins + expected_hist, expected_edges = numpy.histogram(v, bins, weights=w) + result_hist, result_edges = dpnp.histogram(iv, bins, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + # with even bins + expected_hist, expected_edges = numpy.histogram( + v, bins=2, range=[1, 3], weights=w + ) + result_hist, result_edges = dpnp.histogram( + iv, bins=2, range=[1, 3], weights=iw + ) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_no_side_effects(self): + v = dpnp.array([1.3, 2.5, 2.3]) + copy_v = v.copy() + + # check that ensures that values passed to ``histogram`` are unchanged + _, _ = dpnp.histogram(v, range=[-10, 10], bins=100) + assert (v == copy_v).all() + + def test_empty(self): + expected_hist, expected_edges = numpy.histogram( + numpy.array([]), bins=([0, 1]) + ) + result_hist, result_edges = dpnp.histogram( + dpnp.array([]), bins=([0, 1]) + ) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_error_binnum_type(self, xp): + vals = xp.linspace(0.0, 1.0, num=100) + + # `bins` must be an integer, a string, or an array + _, _ = xp.histogram(vals, 5) + assert_raises(TypeError, xp.histogram, vals, 2.4) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_finite_range(self, xp): + vals = xp.linspace(0.0, 1.0, num=100) + + # normal ranges should be fine + _, _ = xp.histogram(vals, range=[0.25, 0.75]) + assert_raises(ValueError, xp.histogram, vals, range=[xp.nan, 0.75]) + assert_raises(ValueError, xp.histogram, vals, range=[0.25, xp.inf]) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range(self, xp): + # start of range must be < end of range + vals = xp.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + xp.histogram(vals, range=[0.1, 0.01]) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) + def test_infinite_edge(self, xp, inf_val): + v = xp.array([0.5, 1.5, inf_val]) + min, max = v.min(), v.max() + + # both first and last ranges must be finite + with assert_raises_regex( + ValueError, f"autodetected range of \[{min}, {max}\] is not finite" + ): + xp.histogram(v) + + def test_bin_edge_cases(self): + v = dpnp.array([337, 404, 739, 806, 1007, 1811, 2012]) + + hist, edges = dpnp.histogram(v, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + + # floating-point computations correctly place edge cases + for x, left, right in zip(v, left_edges, right_edges): + assert_(x >= left) + assert_(x < right) + + @pytest.mark.skipif(not has_support_aspect64(), reason="fp64 required") + def test_last_bin_inclusive_range(self): + v = numpy.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0]) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram( + v, bins=30, range=(-0.5, 5) + ) + result_hist, result_edges = dpnp.histogram(iv, bins=30, range=(-0.5, 5)) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_bin_array_dims(self, xp): + # gracefully handle bins object > 1 dimension + vals = xp.linspace(0.0, 1.0, num=100) + bins = xp.array([[0, 0.5], [0.6, 1.0]]) + with assert_raises_regex(ValueError, "must be 1d"): + xp.histogram(vals, bins=bins) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unsigned_monotonicity_check(self, xp): + # bins must increase monotonically when bins contain unsigned values + arr = xp.array([2]) + bins = xp.array([1, 3, 1], dtype="uint64") + with assert_raises(ValueError): + xp.histogram(arr, bins=bins) + + def test_nan_values(self): + one_nan = numpy.array([0, 1, numpy.nan]) + all_nan = numpy.array([numpy.nan, numpy.nan]) + + ione_nan = dpnp.array(one_nan) + iall_nan = dpnp.array(all_nan) + + # NaN is not counted + expected_hist, expected_edges = numpy.histogram(one_nan, bins=[0, 1]) + result_hist, result_edges = dpnp.histogram(ione_nan, bins=[0, 1]) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + # NaN is not counted + expected_hist, expected_edges = numpy.histogram(all_nan, bins=[0, 1]) + result_hist, result_edges = dpnp.histogram(iall_nan, bins=[0, 1]) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize( + "dtype", + [numpy.byte, numpy.short, numpy.intc, numpy.int_, numpy.longlong], + ) + def test_signed_overflow_bounds(self, dtype): + exponent = 8 * numpy.dtype(dtype).itemsize - 1 + v = numpy.array([-(2**exponent) + 4, 2**exponent - 4], dtype=dtype) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram(v, bins=2) + result_hist, result_edges = dpnp.histogram(iv, bins=2) + assert_array_equal(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + def test_string_bins_not_implemented(self): + v = dpnp.arange(5) + + # numpy support string bins, but not dpnp + _, _ = numpy.histogram(v.asnumpy(), bins="auto") + with assert_raises(NotImplementedError): + dpnp.histogram(v, bins="auto") + + def test_bins_another_sycl_queue(self): + v = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + bins = dpnp.arange(4, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram(v, bins=bins) + + def test_weights_another_sycl_queue(self): + v = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + w = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram(v, weights=w) diff --git a/tests/test_histograms.py b/tests/test_histograms.py deleted file mode 100644 index a283c5547cc6..000000000000 --- a/tests/test_histograms.py +++ /dev/null @@ -1,89 +0,0 @@ -import numpy -import pytest - -import dpnp - -from .helper import has_support_aspect64 - - -class TestHistogram: - def setup(self): - pass - - def teardown(self): - pass - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_simple(self): - n = 100 - v = dpnp.random.rand(n) - a, _ = dpnp.histogram(v) - # check if the sum of the bins equals the number of samples - numpy.testing.assert_equal(dpnp.sum(a, axis=0), n) - # check that the bin counts are evenly spaced when the data is from - # a linear function - a, _ = dpnp.histogram( - numpy.linspace( - 0, - 10, - 100, - dtype="float64" if has_support_aspect64() else "float32", - ) - ) - numpy.testing.assert_array_equal(a, 10) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_one_bin(self): - # Ticket 632 - hist, edges = dpnp.histogram([1, 2, 3, 4], [1, 2]) - numpy.testing.assert_array_equal( - hist, - [ - 2, - ], - ) - numpy.testing.assert_array_equal(edges, [1, 2]) - numpy.testing.assert_raises(ValueError, dpnp.histogram, [1, 2], bins=0) - h, e = dpnp.histogram([1, 2], bins=1) - numpy.testing.assert_equal(h, dpnp.array([2])) - numpy.testing.assert_allclose(e, dpnp.array([1.0, 2.0])) - - def test_density(self): - # Check that the integral of the density equals 1. - n = 100 - v = dpnp.random.rand(n) - a, b = dpnp.histogram(v, density=True) - area = dpnp.sum(a * dpnp.diff(b)[0])[0] - numpy.testing.assert_almost_equal(area, 1) - - # Check with non-constant bin widths - v = dpnp.arange(10) - bins = [0, 1, 3, 6, 10] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, 0.1) - numpy.testing.assert_equal(dpnp.sum(a * dpnp.diff(b))[0], 1) - - # Test that passing False works too - a, b = dpnp.histogram(v, bins, density=False) - numpy.testing.assert_array_equal(a, [1, 2, 3, 4]) - - # Variable bin widths are especially useful to deal with - # infinities. - v = dpnp.arange(10) - bins = [0, 1, 3, 6, numpy.inf] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, [0.1, 0.1, 0.1, 0.0]) - - # Taken from a bug report from N. Becker on the numpy-discussion - # mailing list Aug. 6, 2010. - counts, _ = dpnp.histogram( - [1, 2, 3, 4], [0.5, 1.5, numpy.inf], density=True - ) - numpy.testing.assert_equal(counts, [0.25, 0]) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_arr_weights_mismatch(self): - a = dpnp.arange(10) + 0.5 - w = dpnp.arange(11) + 0.5 - with numpy.testing.assert_raises_regex(ValueError, "same shape as"): - h, b = dpnp.histogram(a, range=[1, 9], weights=w, density=True) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 9b0f136d0e89..dfa2296dbac1 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -2054,3 +2054,27 @@ def test_lstsq(m, n, nrhs, device): for param_dp in result_dp: assert_sycl_queue_equal(param_dp.sycl_queue, a_dp.sycl_queue) assert_sycl_queue_equal(param_dp.sycl_queue, b_dp.sycl_queue) + + +@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_histogram(weights, device): + v = numpy.arange(5) + w = weights + + iv = dpnp.array(v, device=device) + iw = None if weights is None else dpnp.array(w, sycl_queue=iv.sycl_queue) + + expected_hist, expected_edges = numpy.histogram(v, weights=w) + result_hist, result_edges = dpnp.histogram(iv, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_dtype_allclose(result_edges, expected_edges) + + hist_queue = result_hist.sycl_queue + edges_queue = result_edges.sycl_queue + assert_sycl_queue_equal(hist_queue, iv.sycl_queue) + assert_sycl_queue_equal(edges_queue, iv.sycl_queue) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 0bdd969df6c3..7923e60455b1 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -1195,3 +1195,16 @@ def test_lstsq(m, n, nrhs, usm_type_a, usm_type_b): assert param.usm_type == du.get_coerced_usm_type( [usm_type_a, usm_type_b] ) + + +@pytest.mark.parametrize("usm_type_v", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) +def test_histogram(usm_type_v, usm_type_w): + v = dp.arange(5, usm_type=usm_type_v) + w = dp.arange(7, 12, usm_type=usm_type_w) + + hist, edges = dp.histogram(v, weights=w) + assert v.usm_type == usm_type_v + assert w.usm_type == usm_type_w + assert hist.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) + assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) diff --git a/tests/third_party/cupy/statistics_tests/test_histogram.py b/tests/third_party/cupy/statistics_tests/test_histogram.py index 1fc8dcb5387c..bb1dd8e07ce5 100644 --- a/tests/third_party/cupy/statistics_tests/test_histogram.py +++ b/tests/third_party/cupy/statistics_tests/test_histogram.py @@ -5,11 +5,9 @@ import pytest import dpnp as cupy +from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing -# from cupy.core import _accelerator - - # Note that numpy.bincount does not support uint64 on 64-bit environment # as it casts an input array to intp. # And it does not support uint32, int64 and uint64 on 32-bit environment. @@ -40,36 +38,36 @@ def for_signed_dtypes_bincount(name="dtype"): def for_all_dtypes_combination_bincount(names): - return testing._loops.for_dtypes_combination(_all_types, names=names) + return testing.for_dtypes_combination(_all_types, names=names) class TestHistogram(unittest.TestCase): @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-7, type_check=has_support_aspect64()) def test_histogram_same_value(self, xp, dtype): - x = xp.zeros(10, dtype) + x = xp.zeros(10, dtype=dtype) y, bin_edges = xp.histogram(x, 3) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram_density(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x, density=True) # check normalization area = xp.sum(y * xp.diff(bin_edges)) - testing.assert_allclose(area, 1) + testing.assert_allclose(area, 1, rtol=1e-6) return y, bin_edges @testing.for_float_dtypes() - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose() def test_histogram_range_lower_outliers(self, xp, dtype): # Check that lower outliers are not tallied a = xp.arange(10, dtype=dtype) + 0.5 @@ -78,7 +76,7 @@ def test_histogram_range_lower_outliers(self, xp, dtype): return h, b @testing.for_float_dtypes() - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose() def test_histogram_range_upper_outliers(self, xp, dtype): # Check that upper outliers are not tallied a = xp.arange(10, dtype=dtype) + 0.5 @@ -87,7 +85,7 @@ def test_histogram_range_upper_outliers(self, xp, dtype): return h, b @testing.for_float_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram_range_with_density(self, xp, dtype): a = xp.arange(10, dtype=dtype) + 0.5 h, b = xp.histogram(a, range=[1, 9], density=True) @@ -96,7 +94,7 @@ def test_histogram_range_with_density(self, xp, dtype): return h @testing.for_float_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=False) def test_histogram_range_with_weights_and_density(self, xp, dtype): a = xp.arange(10, dtype=dtype) + 0.5 w = xp.arange(10, dtype=dtype) + 0.5 @@ -128,7 +126,7 @@ def test_histogram_weights_mismatch(self, dtype): def test_histogram_int_weights_dtype(self, xp, dtype): # Check the type of the returned histogram a = xp.arange(10, dtype=dtype) - h, b = xp.histogram(a, weights=xp.ones(10, int)) + h, b = xp.histogram(a, weights=xp.ones(10, dtype=int)) assert xp.issubdtype(h.dtype, xp.integer) return h @@ -137,11 +135,10 @@ def test_histogram_int_weights_dtype(self, xp, dtype): def test_histogram_float_weights_dtype(self, xp, dtype): # Check the type of the returned histogram a = xp.arange(10, dtype=dtype) - h, b = xp.histogram(a, weights=xp.ones(10, float)) + h, b = xp.histogram(a, weights=xp.ones(10, dtype=xp.float32)) assert xp.issubdtype(h.dtype, xp.floating) return h - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_histogram_weights_basic(self): v = cupy.random.rand(100) w = cupy.ones(100) * 5 @@ -163,7 +160,7 @@ def test_histogram_float_weights(self, xp, dtype): return wb @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_histogram_int_weights(self, xp, dtype): # Check with integer weights v = xp.asarray([1, 2, 2, 4], dtype=dtype) @@ -173,7 +170,7 @@ def test_histogram_int_weights(self, xp, dtype): return wa, wb @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_int_weights_normalized(self, xp, dtype): v = xp.asarray([1, 2, 2, 4], dtype=dtype) w = xp.asarray([4, 3, 2, 1], dtype=dtype) @@ -184,7 +181,7 @@ def test_histogram_int_weights_normalized(self, xp, dtype): return wb @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): # Check weights with non-uniform bin widths a, b = xp.histogram( @@ -197,7 +194,7 @@ def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): return a, b @testing.for_complex_dtypes() - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_complex_weights(self, xp, dtype): values = xp.asarray([1.3, 2.5, 2.3]) weights = xp.asarray([1, -1, 2]) + 1j * xp.asarray([2, 1, 2]) @@ -206,7 +203,7 @@ def test_histogram_complex_weights(self, xp, dtype): return a, b @testing.for_complex_dtypes() - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_array_equal() def test_histogram_complex_weights_uneven_bins(self, xp, dtype): values = xp.asarray([1.3, 2.5, 2.3]) weights = xp.asarray([1, -1, 2]) + 1j * xp.asarray([2, 1, 2]) @@ -215,14 +212,14 @@ def test_histogram_complex_weights_uneven_bins(self, xp, dtype): return a, b @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_empty(self, xp, dtype): x = xp.array([], dtype) y, bin_edges = xp.histogram(x) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_histogram_int_bins(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x, 4) @@ -261,18 +258,21 @@ def test_histogram_bins_not_ordered(self, dtype): with pytest.raises(ValueError): xp.histogram(x, bins) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_duplicated_value(self, xp, dtype): x = xp.array([1, 2, 2, 1, 2, 4], dtype) return xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_combination_bincount(names=["x_type", "w_type"]) @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_with_weight(self, xp, x_type, w_type): @@ -280,12 +280,14 @@ def test_bincount_with_weight(self, xp, x_type, w_type): w = testing.shaped_arange((3,), xp, w_type) return xp.bincount(x, weights=w) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_with_minlength(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x, minlength=5) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_combination_bincount(names=["x_type", "w_type"]) def test_bincount_invalid_weight_length(self, x_type, w_type): for xp in (numpy, cupy): @@ -296,6 +298,7 @@ def test_bincount_invalid_weight_length(self, x_type, w_type): with pytest.raises((ValueError, TypeError)): xp.bincount(x, weights=w) + @pytest.mark.skip("bincount() is not implemented yet") @for_signed_dtypes_bincount() def test_bincount_negative(self, dtype): for xp in (numpy, cupy): @@ -303,6 +306,7 @@ def test_bincount_negative(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_deep(self, dtype): for xp in (numpy, cupy): @@ -310,6 +314,7 @@ def test_bincount_too_deep(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_small(self, dtype): for xp in (numpy, cupy): @@ -317,12 +322,14 @@ def test_bincount_too_small(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_zero(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x, minlength=0) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_small_minlength(self, dtype): for xp in (numpy, cupy): @@ -333,42 +340,12 @@ def test_bincount_too_small_minlength(self, dtype): xp.bincount(x, minlength=-1) -# This class compares CUB results against NumPy's - -# @unittest.skipUnless(cupy.cuda.cub.available, 'The CUB routine is not enabled') -# class TestCubHistogram(unittest.TestCase): - -# def setUp(self): -# self.old_accelerators = _accelerator.get_routine_accelerators() -# _accelerator.set_routine_accelerators(['cub']) - -# def tearDown(self): -# _accelerator.set_routine_accelerators(self.old_accelerators) - -# @testing.for_all_dtypes(no_bool=True, no_complex=True) -# @testing.numpy_cupy_array_equal() -# def test_histogram(self, xp, dtype): -# x = testing.shaped_arange((10,), xp, dtype) - -# if xp is numpy: -# return xp.histogram(x) - -# # xp is cupy, first ensure we really use CUB -# cub_func = 'cupy._statistics.histogram.cub.device_histogram' -# with testing.AssertFunctionIsCalled(cub_func): -# xp.histogram(x) -# # ...then perform the actual computation -# return xp.histogram(x) - -# @testing.for_all_dtypes(no_bool=True, no_complex=True) -# @testing.numpy_cupy_array_equal() -# def test_histogram_range_float(self, xp, dtype): -# a = testing.shaped_arange((10,), xp, dtype) -# h, b = xp.histogram(a, testing.shaped_arange((10,), xp, numpy.float64)) -# assert int(h.sum()) == 10 -# return h, b +# TODO(leofang): we temporarily remove CUB histogram support for now, +# see cupy/cupy#7698. When it's ready, revert the commit that checked +# in this comment to restore the support. +@pytest.mark.skip("digitize() is not implemented yet") @testing.parameterize( *testing.product( { @@ -386,7 +363,7 @@ def test_bincount_too_small_minlength(self, dtype): } ) ) -class TestDigitize(unittest.TestCase): +class TestDigitize: @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_array_equal() def test_digitize(self, xp, dtype): @@ -399,6 +376,7 @@ def test_digitize(self, xp, dtype): return (y,) +@pytest.mark.skip("digitize() is not implemented yet") @testing.parameterize({"right": True}, {"right": False}) class TestDigitizeNanInf(unittest.TestCase): @testing.numpy_cupy_array_equal() @@ -469,11 +447,12 @@ def test_searchsorted_minf(self, xp): return (y,) +@pytest.mark.skip("digitize() is not implemented yet") class TestDigitizeInvalid(unittest.TestCase): def test_digitize_complex(self): for xp in (numpy, cupy): - x = testing.shaped_arange((14,), xp, xp.complex) - bins = xp.array([1.0, 3.0, 5.0, 8.0, 12.0], xp.complex) + x = testing.shaped_arange((14,), xp, complex) + bins = xp.array([1.0, 3.0, 5.0, 8.0, 12.0], complex) with pytest.raises(TypeError): xp.digitize(x, bins) @@ -483,3 +462,142 @@ def test_digitize_nd_bins(self): bins = xp.array([[1], [2]]) with pytest.raises(ValueError): xp.digitize(x, bins) + + +@pytest.mark.skip("histogramdd() is not implemented yet") +@testing.parameterize( + *testing.product( + { + "weights": [None, 1, 2], + "weights_dtype": [numpy.int32, numpy.float64], + "density": [True, False], + "bins": [ + 10, + (8, 16, 12), + (16, 8, 12), + (16, 12, 8), + (12, 8, 16), + "array_list", + ], + "range": [None, ((20, 50), (10, 100), (0, 40))], + } + ) +) +class TestHistogramdd: + @testing.for_all_dtypes(no_bool=True, no_complex=True) + @testing.numpy_cupy_allclose(atol=1e-7, rtol=1e-7) + def test_histogramdd(self, xp, dtype): + x = testing.shaped_random((100, 3), xp, dtype, scale=100) + if self.bins == "array_list": + bins = [xp.arange(0, 100, 4), xp.arange(0, 100, 10), xp.arange(25)] + else: + bins = self.bins + if self.weights is not None: + weights = xp.ones((x.shape[0],), dtype=self.weights_dtype) + else: + weights = None + y, bin_edges = xp.histogramdd( + x, + bins=bins, + range=self.range, + weights=weights, + density=self.density, + ) + return [ + y, + ] + [e for e in bin_edges] + + +@pytest.mark.skip("histogramdd() is not implemented yet") +class TestHistogramddErrors(unittest.TestCase): + def test_histogramdd_invalid_bins(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = [ + xp.arange(0, 100, 10), + ] * 3 + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins) + + def test_histogramdd_invalid_bins2(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=0) + + def test_histogramdd_invalid_bins3(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = xp.arange(100) + bins[30] = 99 # non-ascending bins + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=bins) + + def test_histogramdd_invalid_bins4(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = xp.arange(64).reshape((8, 8)) # too many dimensions + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=bins) + + def test_histogramdd_invalid_range(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + r = ((0, 100),) * 3 + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, range=r) + + def test_histogramdd_disallow_arraylike_bins(self): + x = testing.shaped_random((16, 2), cupy, scale=100) + bins = [[0, 10, 20, 50, 90]] * 2 # too many dimensions + with pytest.raises(ValueError): + y, bin_edges = cupy.histogramdd(x, bins=bins) + + +@pytest.mark.skip("histogram2d() is not implemented yet") +@testing.parameterize( + *testing.product( + { + "weights": [None, 1, 2], + "weights_dtype": [numpy.int32, numpy.float64], + "density": [True, False], + "bins": [10, (8, 16), (16, 8), "array_list", "array"], + "range": [None, ((20, 50), (10, 100))], + } + ) +) +class TestHistogram2d: + @testing.for_all_dtypes(no_bool=True, no_complex=True) + @testing.numpy_cupy_allclose(atol=1e-7, rtol=1e-7) + def test_histogram2d(self, xp, dtype): + x = testing.shaped_random((100,), xp, dtype, scale=100) + y = testing.shaped_random((100,), xp, dtype, scale=100) + if self.bins == "array_list": + bins = [xp.arange(0, 100, 4), xp.arange(0, 100, 10)] + elif self.bins == "array": + bins = xp.arange(0, 100, 4) + else: + bins = self.bins + if self.weights is not None: + weights = xp.ones((x.shape[0],), dtype=self.weights_dtype) + else: + weights = None + y, edges0, edges1 = xp.histogram2d( + x, + y, + bins=bins, + range=self.range, + weights=weights, + density=self.density, + ) + return y, edges0, edges1 + + +@pytest.mark.skip("histogram2d() is not implemented yet") +class TestHistogram2dErrors(unittest.TestCase): + def test_histogram2d_disallow_arraylike_bins(self): + x = testing.shaped_random((16,), cupy, scale=100) + y = testing.shaped_random((16,), cupy, scale=100) + bins = [0, 10, 20, 50, 90] + with pytest.raises(ValueError): + y, bin_edges = cupy.histogram2d(x, y, bins=bins)