From 2b8585a6fb70c9f0d0c579fc5762ec8e74d317e0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 11 Dec 2023 18:31:48 +0100 Subject: [PATCH 01/12] Implement dpnp.histogram --- .pre-commit-config.yaml | 2 +- dpnp/dpnp_iface.py | 3 + dpnp/dpnp_iface_histograms.py | 302 ++++++++++++++++++++++++++++++++++ dpnp/dpnp_iface_statistics.py | 38 ----- 4 files changed, 306 insertions(+), 39 deletions(-) create mode 100644 dpnp/dpnp_iface_histograms.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 690e131f6f26..9f873ffe1971 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: rev: 24.4.0 hooks: - id: black - args: ["--check", "--diff", "--color"] + args: ["--color"] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 8769b503003b..0cf37845534d 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -78,6 +78,8 @@ from dpnp.dpnp_iface_bitwise import __all__ as __all__bitwise from dpnp.dpnp_iface_counting import * from dpnp.dpnp_iface_counting import __all__ as __all__counting +from dpnp.dpnp_iface_histograms import * +from dpnp.dpnp_iface_histograms import __all__ as __all__histograms from dpnp.dpnp_iface_indexing import * from dpnp.dpnp_iface_indexing import __all__ as __all__indexing from dpnp.dpnp_iface_libmath import * @@ -111,6 +113,7 @@ __all__ += __all__arraycreation __all__ += __all__bitwise __all__ += __all__counting +__all__ += __all__histograms __all__ += __all__indexing __all__ += __all__libmath __all__ += __all__linearalgebra diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py new file mode 100644 index 000000000000..3c5276eb1a28 --- /dev/null +++ b/dpnp/dpnp_iface_histograms.py @@ -0,0 +1,302 @@ +import operator +import warnings + +import dpctl.utils as dpu +import numpy + +import dpnp + +__all__ = [ + "histogram", +] + +# range is a keyword argument to many functions, so save the builtin so they can +# use it. +_range = range + + +def _ravel_check_a_and_weights(a, weights): + """Check input `a` and `weights` arrays, and ravel both.""" + + # ensure that `a` array has supported type + dpnp.check_supported_arrays_type(a) + + # ensure that the array is a "subtractable" dtype + if a.dtype == dpnp.bool: + warnings.warn( + "Converting input from {} to {} for compatibility.".format( + a.dtype, dpnp.uint8 + ), + RuntimeWarning, + stacklevel=3, + ) + a = a.astype(dpnp.uint8) + + if weights is not None: + # check that `weights` array has supported type + dpnp.check_supported_arrays_type(weights) + + # check that arrays have the same allocation queue + if dpu.get_execution_queue([a.sycl_queue, weights.sycl_queue]) is None: + raise ValueError( + "a and weights must be allocated on the same SYCL queue" + ) + + if weights.shape != a.shape: + raise ValueError("weights should have the same shape as a.") + weights = weights.ravel() + a = a.ravel() + return a, weights + + +def _get_outer_edges(a, range): + """ + Determine the outer bin edges to use, from either the data or the range + argument. + + """ + + if range is not None: + first_edge, last_edge = range + if first_edge > last_edge: + raise ValueError("max must be larger than min in range parameter.") + + if not (numpy.isfinite(first_edge) and numpy.isfinite(last_edge)): + raise ValueError( + "supplied range of [{}, {}] is not finite".format( + first_edge, last_edge + ) + ) + + elif a.size == 0: + # handle empty arrays. Can't determine range, so use 0-1. + first_edge, last_edge = 0, 1 + + else: + first_edge, last_edge = a.min(), a.max() + if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): + raise ValueError( + "autodetected range of [{}, {}] is not finite".format( + first_edge, last_edge + ) + ) + + # expand empty range to avoid divide by zero + if first_edge == last_edge: + first_edge = first_edge - 0.5 + last_edge = last_edge + 0.5 + + return first_edge, last_edge + + +def _get_bin_edges(a, bins, range): + """Computes the bins used internally by `histogram`.""" + + # parse the overloaded bins argument + n_equal_bins = None + bin_edges = None + + if isinstance(bins, str): + raise NotImplementedError("only integer and array bins are implemented") + + elif numpy.ndim(bins) == 0: + try: + n_equal_bins = operator.index(bins) + except TypeError as e: + raise TypeError("`bins` must be an integer or an array") from e + if n_equal_bins < 1: + raise ValueError("`bins` must be positive, when an integer") + + first_edge, last_edge = _get_outer_edges(a, range) + + elif numpy.ndim(bins) == 1: + if dpnp.is_supported_array_type(bins): + if dpu.get_execution_queue([a.sycl_queue, bins.sycl_queue]) is None: + raise ValueError( + "a and bins must be allocated on the same SYCL queue" + ) + + bin_edges = bins + else: + bin_edges = dpnp.asarray( + bins, sycl_queue=a.sycl_queue, usm_type=a.usm_type + ) + + if dpnp.any(bin_edges[:-1] > bin_edges[1:]): + raise ValueError( + "`bins` must increase monotonically, when an array" + ) + + else: + raise ValueError("`bins` must be 1d, when an array") + + if n_equal_bins is not None: + # numpy's gh-10322 means that type resolution rules are dependent on + # array shapes. To avoid this causing problems, we pick a type now and + # stick with it throughout. + bin_type = dpnp.result_type(first_edge, last_edge, a) + if dpnp.issubdtype(bin_type, dpnp.integer): + bin_type = dpnp.result_type( + bin_type, dpnp.default_float_type(sycl_queue=a.sycl_queue), a + ) + + # bin edges must be computed + bin_edges = dpnp.linspace( + first_edge, + last_edge, + n_equal_bins + 1, + endpoint=True, + dtype=bin_type, + sycl_queue=a.sycl_queue, + usm_type=a.usm_type, + ) + return bin_edges, (first_edge, last_edge, n_equal_bins) + else: + return bin_edges, None + + +def _search_sorted_inclusive(a, v): + """ + Like :obj:`dpnp.searchsorted`, but where the last item in `v` is placed + on the right. + In the context of a histogram, this makes the last bin edge inclusive + + """ + + return dpnp.concatenate( + (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right")) + ) + + +def histogram(a, bins=10, range=None, density=None, weights=None): + """ + Compute the histogram of a dataset. + + For full documentation refer to :obj:`numpy.histogram`. + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input data. The histogram is computed over the flattened array. + bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional + If `bins` is an int, it defines the number of equal-width bins in the + given range (``10``, by default). + If `bins` is a sequence, it defines a monotonically increasing array + of bin edges, including the rightmost edge, allowing for non-uniform + bin widths. + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by :obj:`dpnp.histogram_bin_edges`. + range : {2-tuple of float}, optional + The lower and upper range of the bins. If not provided, range is simply + ``(a.min(), a.max())``. Values outside the range are ignored. The first + element of the range must be less than or equal to the second. `range` + affects the automatic bin computation as well. While bin width is + computed to be optimal based on the actual data within `range`, the bin + count will fill the entire range including portions containing no data. + weights : {dpnp.ndarray, usm_ndarray}, optional + An array of weights, of the same shape as `a`. Each value in `a` only + contributes its associated weight towards the bin count (instead of 1). + If `density` is ``True``, the weights are normalized, so that the + integral of the density over the range remains ``1``. + Please note that the ``dtype`` of `weights` will also become the + ``dtype`` of the returned accumulator (`hist`), so it must be large + enough to hold accumulated values as well. + density : {bool}, optional + If ``False``, the result will contain the number of samples in each bin. + If ``True``, the result is the value of the probability *density* + function at the bin, normalized such that the *integral* over the range + is ``1``. Note that the sum of the histogram values will not be equal + to ``1`` unless bins of unity width are chosen; it is not a probability + *mass* function. + + Returns + ------- + hist : {dpnp.ndarray} + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. If `weights` are given, + ``hist.dtype`` will be taken from `weights`. + bin_edges : {dpnp.ndarray of floating data type} + Return the bin edges ``(length(hist) + 1)``. + + See Also + -------- + :obj:`dpnp.histogramdd` : TODO + :obj:`dpnp.bincount` : TODO + :obj:`dpnp.searchsorted` : TODO + :obj:`dpnp.digitize` : TODO + :obj:`dpnp.histogram_bin_edges` : TODO + + Examples + -------- + >>> import dpnp as np + >>> np.histogram(np.array([1, 2, 1]), bins=[0, 1, 2, 3]) + (array([0, 2, 1]), array([0, 1, 2, 3])) + >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) + (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + >>> np.histogram(np.array([[1, 2, 1], [1, 0, 1]]), bins=[0, 1, 2, 3]) + (array([1, 4, 1]), array([0, 1, 2, 3])) + + >>> a = np.arange(5) + >>> hist, bin_edges = np.histogram(a, density=True) + >>> hist + array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + >>> hist.sum() + array(2.5) + >>> np.sum(hist * np.diff(bin_edges)) + array(1.) + + """ + + a, weights = _ravel_check_a_and_weights(a, weights) + + bin_edges, uniform_bins = _get_bin_edges(a, bins, range) + + # Histogram is an integer or a float array depending on the weights. + if weights is None: + ntype = dpnp.dtype(dpnp.intp) + else: + ntype = weights.dtype + + # We set a block size, as this allows us to iterate over chunks when + # computing histograms, to minimize memory usage. + BLOCK = 65536 + + # The fast path uses bincount, but that only works for certain types + # of weight + # simple_weights = ( + # weights is None or + # np.can_cast(weights.dtype, np.double) or + # np.can_cast(weights.dtype, complex) + # ) + # TODO: implement a fast path + simple_weights = False + + if uniform_bins is not None and simple_weights: + # TODO: implement fast algorithm for equal bins + pass + else: + # Compute via cumulative histogram + cum_n = dpnp.zeros_like(bin_edges, dtype=ntype) + if weights is None: + for i in _range(0, len(a), BLOCK): + sa = dpnp.sort(a[i : i + BLOCK]) + cum_n += _search_sorted_inclusive(sa, bin_edges) + else: + zero = dpnp.zeros(1, dtype=ntype) + for i in _range(0, len(a), BLOCK): + tmp_a = a[i : i + BLOCK] + tmp_w = weights[i : i + BLOCK] + sorting_index = dpnp.argsort(tmp_a) + sa = tmp_a[sorting_index] + sw = tmp_w[sorting_index] + cw = dpnp.concatenate((zero, sw.cumsum())) + bin_index = _search_sorted_inclusive(sa, bin_edges) + cum_n += cw[bin_index] + + n = dpnp.diff(cum_n) + + if density: + db = dpnp.diff(bin_edges) + return n / db / n.sum(), bin_edges + + return n, bin_edges diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index db323456bf40..cd17857b6068 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -65,7 +65,6 @@ "bincount", "correlate", "cov", - "histogram", "max", "mean", "median", @@ -452,43 +451,6 @@ def cov( ) -def histogram(a, bins=10, range=None, density=None, weights=None): - """ - Compute the histogram of a dataset. - - For full documentation refer to :obj:`numpy.histogram`. - - Examples - -------- - >>> import dpnp - >>> dpnp.histogram([1, 2, 1], bins=[0, 1, 2, 3]) - (array([0, 2, 1]), array([0, 1, 2, 3])) - >>> dpnp.histogram(dpnp.arange(4), bins=dpnp.arange(5), density=True) - (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) - >>> dpnp.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) - (array([1, 4, 1]), array([0, 1, 2, 3])) - >>> a = dpnp.arange(5) - >>> hist, bin_edges = dpnp.histogram(a, density=True) - >>> hist - array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) - >>> hist.sum() - 2.4999999999999996 - >>> res = dpnp.sum(hist * dpnp.diff(bin_edges)) - >>> print(res) - 1.0 - - """ - - return call_origin( - numpy.histogram, - a=a, - bins=bins, - range=range, - density=density, - weights=weights, - ) - - def max(a, axis=None, out=None, keepdims=False, initial=None, where=True): """ Return the maximum of an array or maximum along an axis. From b8efe5d59f8c96ecbf8b31e72bd9ab5826c4487c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 2 Apr 2024 17:35:11 +0200 Subject: [PATCH 02/12] Updated third party tests for histogram --- .pre-commit-config.yaml | 2 +- tests/skipped_tests.tbl | 36 --- tests/skipped_tests_gpu.tbl | 36 --- .../cupy/statistics_tests/test_histogram.py | 234 ++++++++++++++---- 4 files changed, 182 insertions(+), 126 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9f873ffe1971..690e131f6f26 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: rev: 24.4.0 hooks: - id: black - args: ["--color"] + args: ["--check", "--diff", "--color"] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index a16d620c0130..0cf233ac23f4 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -1,5 +1,3 @@ -tests/test_histograms.py::TestHistogram::test_density - tests/test_random.py::TestDistributionsMultivariateNormal::test_moments tests/test_random.py::TestDistributionsMultivariateNormal::test_output_shape_check tests/test_random.py::TestDistributionsMultivariateNormal::test_seed @@ -765,40 +763,6 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_duplicated_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_invalid_weight_length -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_negative -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_deep -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_weight -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_zero -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_nonuniform_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_normalized -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range2 -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_list_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_numpy_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_lower_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_upper_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[lower] diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index c748caf9d4b7..60c8a2d05811 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -98,8 +98,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPois tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_2_{lam_shape=(3, 2), shape=(4, 3, 2)}::test_poisson tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_3_{lam_shape=(3, 2), shape=(3, 2)}::test_poisson -tests/test_histograms.py::TestHistogram::test_density - tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_0_{shape=(0,)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_1_{shape=(2, 3)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_2_{shape=(1, 0, 1)}::test_item @@ -830,40 +828,6 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_6_ tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_7_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(10,)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_8_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=True, shape=(6, 3, 3)}::test_digitize tests/third_party/cupy/statistics_tests/test_histogram.py::TestDigitize_param_9_{bins=[1.5, 2.5, 4.0, 6.0], increasing=False, right=False, shape=()}::test_digitize -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_duplicated_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_invalid_weight_length -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_negative -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_deep -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_too_small_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_minlength -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_with_weight -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_bincount_zero -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_array_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_bins_not_ordered -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_complex_weights_uneven_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_empty -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_float_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_dtype -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_nonuniform_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_int_weights_normalized -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_invalid_range2 -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_list_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_numpy_bins -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_lower_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_upper_outliers -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value -tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[linear] tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[lower] diff --git a/tests/third_party/cupy/statistics_tests/test_histogram.py b/tests/third_party/cupy/statistics_tests/test_histogram.py index 1fc8dcb5387c..ef530ddfa8ad 100644 --- a/tests/third_party/cupy/statistics_tests/test_histogram.py +++ b/tests/third_party/cupy/statistics_tests/test_histogram.py @@ -5,11 +5,9 @@ import pytest import dpnp as cupy +from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing -# from cupy.core import _accelerator - - # Note that numpy.bincount does not support uint64 on 64-bit environment # as it casts an input array to intp. # And it does not support uint32, int64 and uint64 on 32-bit environment. @@ -40,36 +38,36 @@ def for_signed_dtypes_bincount(name="dtype"): def for_all_dtypes_combination_bincount(names): - return testing._loops.for_dtypes_combination(_all_types, names=names) + return testing.for_dtypes_combination(_all_types, names=names) class TestHistogram(unittest.TestCase): @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-7, type_check=has_support_aspect64()) def test_histogram_same_value(self, xp, dtype): - x = xp.zeros(10, dtype) + x = xp.zeros(10, dtype=dtype) y, bin_edges = xp.histogram(x, 3) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram_density(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x, density=True) # check normalization area = xp.sum(y * xp.diff(bin_edges)) - testing.assert_allclose(area, 1) + testing.assert_allclose(area, 1, rtol=1e-6) return y, bin_edges @testing.for_float_dtypes() - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose() def test_histogram_range_lower_outliers(self, xp, dtype): # Check that lower outliers are not tallied a = xp.arange(10, dtype=dtype) + 0.5 @@ -78,7 +76,7 @@ def test_histogram_range_lower_outliers(self, xp, dtype): return h, b @testing.for_float_dtypes() - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose() def test_histogram_range_upper_outliers(self, xp, dtype): # Check that upper outliers are not tallied a = xp.arange(10, dtype=dtype) + 0.5 @@ -87,7 +85,7 @@ def test_histogram_range_upper_outliers(self, xp, dtype): return h, b @testing.for_float_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=has_support_aspect64()) def test_histogram_range_with_density(self, xp, dtype): a = xp.arange(10, dtype=dtype) + 0.5 h, b = xp.histogram(a, range=[1, 9], density=True) @@ -95,6 +93,7 @@ def test_histogram_range_with_density(self, xp, dtype): testing.assert_allclose(float((h * xp.diff(b)).sum()), 1) return h + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_histogram_range_with_weights_and_density(self, xp, dtype): @@ -123,15 +122,17 @@ def test_histogram_weights_mismatch(self, dtype): with pytest.raises(ValueError): xp.histogram(a, range=[1, 9], weights=w, density=True) + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_allclose() def test_histogram_int_weights_dtype(self, xp, dtype): # Check the type of the returned histogram a = xp.arange(10, dtype=dtype) - h, b = xp.histogram(a, weights=xp.ones(10, int)) + h, b = xp.histogram(a, weights=xp.ones(10, dtype=int)) assert xp.issubdtype(h.dtype, xp.integer) return h + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_allclose() def test_histogram_float_weights_dtype(self, xp, dtype): @@ -141,7 +142,7 @@ def test_histogram_float_weights_dtype(self, xp, dtype): assert xp.issubdtype(h.dtype, xp.floating) return h - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.skip("cumsum() is not supported with complex dtypes") def test_histogram_weights_basic(self): v = cupy.random.rand(100) w = cupy.ones(100) * 5 @@ -152,6 +153,7 @@ def test_histogram_weights_basic(self): testing.assert_array_almost_equal(a * 5, wa) testing.assert_array_almost_equal(na, nwa) + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_histogram_float_weights(self, xp, dtype): @@ -162,6 +164,7 @@ def test_histogram_float_weights(self, xp, dtype): testing.assert_array_almost_equal(wa, w) return wb + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) @testing.numpy_cupy_array_equal(type_check=False) def test_histogram_int_weights(self, xp, dtype): @@ -172,6 +175,7 @@ def test_histogram_int_weights(self, xp, dtype): testing.assert_array_equal(wa, [4, 5, 0, 1]) return wa, wb + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) @testing.numpy_cupy_allclose() def test_histogram_int_weights_normalized(self, xp, dtype): @@ -183,6 +187,7 @@ def test_histogram_int_weights_normalized(self, xp, dtype): ) return wb + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) @testing.numpy_cupy_array_equal() def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): @@ -196,6 +201,7 @@ def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): testing.assert_array_almost_equal(a, [0.2, 0.1, 0.1, 0.075]) return a, b + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_complex_dtypes() @testing.numpy_cupy_array_equal(type_check=False) def test_histogram_complex_weights(self, xp, dtype): @@ -205,6 +211,7 @@ def test_histogram_complex_weights(self, xp, dtype): a, b = xp.histogram(values, bins=2, weights=weights) return a, b + @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_complex_dtypes() @testing.numpy_cupy_array_equal(type_check=False) def test_histogram_complex_weights_uneven_bins(self, xp, dtype): @@ -215,14 +222,14 @@ def test_histogram_complex_weights_uneven_bins(self, xp, dtype): return a, b @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_empty(self, xp, dtype): x = xp.array([], dtype) y, bin_edges = xp.histogram(x) return y, bin_edges @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_histogram_int_bins(self, xp, dtype): x = testing.shaped_arange((10,), xp, dtype) y, bin_edges = xp.histogram(x, 4) @@ -261,18 +268,21 @@ def test_histogram_bins_not_ordered(self, dtype): with pytest.raises(ValueError): xp.histogram(x, bins) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_duplicated_value(self, xp, dtype): x = xp.array([1, 2, 2, 1, 2, 4], dtype) return xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_combination_bincount(names=["x_type", "w_type"]) @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_with_weight(self, xp, x_type, w_type): @@ -280,12 +290,14 @@ def test_bincount_with_weight(self, xp, x_type, w_type): w = testing.shaped_arange((3,), xp, w_type) return xp.bincount(x, weights=w) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_with_minlength(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x, minlength=5) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_combination_bincount(names=["x_type", "w_type"]) def test_bincount_invalid_weight_length(self, x_type, w_type): for xp in (numpy, cupy): @@ -296,6 +308,7 @@ def test_bincount_invalid_weight_length(self, x_type, w_type): with pytest.raises((ValueError, TypeError)): xp.bincount(x, weights=w) + @pytest.mark.skip("bincount() is not implemented yet") @for_signed_dtypes_bincount() def test_bincount_negative(self, dtype): for xp in (numpy, cupy): @@ -303,6 +316,7 @@ def test_bincount_negative(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_deep(self, dtype): for xp in (numpy, cupy): @@ -310,6 +324,7 @@ def test_bincount_too_deep(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_small(self, dtype): for xp in (numpy, cupy): @@ -317,12 +332,14 @@ def test_bincount_too_small(self, dtype): with pytest.raises(ValueError): xp.bincount(x) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() @testing.numpy_cupy_allclose(accept_error=TypeError) def test_bincount_zero(self, xp, dtype): x = testing.shaped_arange((3,), xp, dtype) return xp.bincount(x, minlength=0) + @pytest.mark.skip("bincount() is not implemented yet") @for_all_dtypes_bincount() def test_bincount_too_small_minlength(self, dtype): for xp in (numpy, cupy): @@ -333,42 +350,12 @@ def test_bincount_too_small_minlength(self, dtype): xp.bincount(x, minlength=-1) -# This class compares CUB results against NumPy's - -# @unittest.skipUnless(cupy.cuda.cub.available, 'The CUB routine is not enabled') -# class TestCubHistogram(unittest.TestCase): - -# def setUp(self): -# self.old_accelerators = _accelerator.get_routine_accelerators() -# _accelerator.set_routine_accelerators(['cub']) - -# def tearDown(self): -# _accelerator.set_routine_accelerators(self.old_accelerators) - -# @testing.for_all_dtypes(no_bool=True, no_complex=True) -# @testing.numpy_cupy_array_equal() -# def test_histogram(self, xp, dtype): -# x = testing.shaped_arange((10,), xp, dtype) - -# if xp is numpy: -# return xp.histogram(x) - -# # xp is cupy, first ensure we really use CUB -# cub_func = 'cupy._statistics.histogram.cub.device_histogram' -# with testing.AssertFunctionIsCalled(cub_func): -# xp.histogram(x) -# # ...then perform the actual computation -# return xp.histogram(x) - -# @testing.for_all_dtypes(no_bool=True, no_complex=True) -# @testing.numpy_cupy_array_equal() -# def test_histogram_range_float(self, xp, dtype): -# a = testing.shaped_arange((10,), xp, dtype) -# h, b = xp.histogram(a, testing.shaped_arange((10,), xp, numpy.float64)) -# assert int(h.sum()) == 10 -# return h, b +# TODO(leofang): we temporarily remove CUB histogram support for now, +# see cupy/cupy#7698. When it's ready, revert the commit that checked +# in this comment to restore the support. +@pytest.mark.skip("digitize() is not implemented yet") @testing.parameterize( *testing.product( { @@ -386,7 +373,7 @@ def test_bincount_too_small_minlength(self, dtype): } ) ) -class TestDigitize(unittest.TestCase): +class TestDigitize: @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_array_equal() def test_digitize(self, xp, dtype): @@ -399,6 +386,7 @@ def test_digitize(self, xp, dtype): return (y,) +@pytest.mark.skip("digitize() is not implemented yet") @testing.parameterize({"right": True}, {"right": False}) class TestDigitizeNanInf(unittest.TestCase): @testing.numpy_cupy_array_equal() @@ -469,11 +457,12 @@ def test_searchsorted_minf(self, xp): return (y,) +@pytest.mark.skip("digitize() is not implemented yet") class TestDigitizeInvalid(unittest.TestCase): def test_digitize_complex(self): for xp in (numpy, cupy): - x = testing.shaped_arange((14,), xp, xp.complex) - bins = xp.array([1.0, 3.0, 5.0, 8.0, 12.0], xp.complex) + x = testing.shaped_arange((14,), xp, complex) + bins = xp.array([1.0, 3.0, 5.0, 8.0, 12.0], complex) with pytest.raises(TypeError): xp.digitize(x, bins) @@ -483,3 +472,142 @@ def test_digitize_nd_bins(self): bins = xp.array([[1], [2]]) with pytest.raises(ValueError): xp.digitize(x, bins) + + +@pytest.mark.skip("histogramdd() is not implemented yet") +@testing.parameterize( + *testing.product( + { + "weights": [None, 1, 2], + "weights_dtype": [numpy.int32, numpy.float64], + "density": [True, False], + "bins": [ + 10, + (8, 16, 12), + (16, 8, 12), + (16, 12, 8), + (12, 8, 16), + "array_list", + ], + "range": [None, ((20, 50), (10, 100), (0, 40))], + } + ) +) +class TestHistogramdd: + @testing.for_all_dtypes(no_bool=True, no_complex=True) + @testing.numpy_cupy_allclose(atol=1e-7, rtol=1e-7) + def test_histogramdd(self, xp, dtype): + x = testing.shaped_random((100, 3), xp, dtype, scale=100) + if self.bins == "array_list": + bins = [xp.arange(0, 100, 4), xp.arange(0, 100, 10), xp.arange(25)] + else: + bins = self.bins + if self.weights is not None: + weights = xp.ones((x.shape[0],), dtype=self.weights_dtype) + else: + weights = None + y, bin_edges = xp.histogramdd( + x, + bins=bins, + range=self.range, + weights=weights, + density=self.density, + ) + return [ + y, + ] + [e for e in bin_edges] + + +@pytest.mark.skip("histogramdd() is not implemented yet") +class TestHistogramddErrors(unittest.TestCase): + def test_histogramdd_invalid_bins(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = [ + xp.arange(0, 100, 10), + ] * 3 + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins) + + def test_histogramdd_invalid_bins2(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=0) + + def test_histogramdd_invalid_bins3(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = xp.arange(100) + bins[30] = 99 # non-ascending bins + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=bins) + + def test_histogramdd_invalid_bins4(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + bins = xp.arange(64).reshape((8, 8)) # too many dimensions + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, bins=bins) + + def test_histogramdd_invalid_range(self): + for xp in (numpy, cupy): + x = testing.shaped_random((16, 2), xp, scale=100) + r = ((0, 100),) * 3 + with pytest.raises(ValueError): + y, bin_edges = xp.histogramdd(x, range=r) + + def test_histogramdd_disallow_arraylike_bins(self): + x = testing.shaped_random((16, 2), cupy, scale=100) + bins = [[0, 10, 20, 50, 90]] * 2 # too many dimensions + with pytest.raises(ValueError): + y, bin_edges = cupy.histogramdd(x, bins=bins) + + +@pytest.mark.skip("histogram2d() is not implemented yet") +@testing.parameterize( + *testing.product( + { + "weights": [None, 1, 2], + "weights_dtype": [numpy.int32, numpy.float64], + "density": [True, False], + "bins": [10, (8, 16), (16, 8), "array_list", "array"], + "range": [None, ((20, 50), (10, 100))], + } + ) +) +class TestHistogram2d: + @testing.for_all_dtypes(no_bool=True, no_complex=True) + @testing.numpy_cupy_allclose(atol=1e-7, rtol=1e-7) + def test_histogram2d(self, xp, dtype): + x = testing.shaped_random((100,), xp, dtype, scale=100) + y = testing.shaped_random((100,), xp, dtype, scale=100) + if self.bins == "array_list": + bins = [xp.arange(0, 100, 4), xp.arange(0, 100, 10)] + elif self.bins == "array": + bins = xp.arange(0, 100, 4) + else: + bins = self.bins + if self.weights is not None: + weights = xp.ones((x.shape[0],), dtype=self.weights_dtype) + else: + weights = None + y, edges0, edges1 = xp.histogram2d( + x, + y, + bins=bins, + range=self.range, + weights=weights, + density=self.density, + ) + return y, edges0, edges1 + + +@pytest.mark.skip("histogram2d() is not implemented yet") +class TestHistogram2dErrors(unittest.TestCase): + def test_histogram2d_disallow_arraylike_bins(self): + x = testing.shaped_random((16,), cupy, scale=100) + y = testing.shaped_random((16,), cupy, scale=100) + bins = [0, 10, 20, 50, 90] + with pytest.raises(ValueError): + y, bin_edges = cupy.histogram2d(x, y, bins=bins) From 5d6e98fb2aefd53d7f5d873af2eb7269781bf152 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 10 Apr 2024 13:48:21 +0200 Subject: [PATCH 03/12] Updated histogram tests due to new dpnp.cumsum() impl --- dpnp/dpnp_iface_histograms.py | 16 ++++++++----- .../cupy/statistics_tests/test_histogram.py | 24 ++++++------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 3c5276eb1a28..6de09faa9f52 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -220,11 +220,15 @@ def histogram(a, bins=10, range=None, density=None, weights=None): See Also -------- - :obj:`dpnp.histogramdd` : TODO - :obj:`dpnp.bincount` : TODO - :obj:`dpnp.searchsorted` : TODO - :obj:`dpnp.digitize` : TODO - :obj:`dpnp.histogram_bin_edges` : TODO + :obj:`dpnp.histogramdd` : Compute the multidimensional histogram. + :obj:`dpnp.bincount` : Count number of occurrences of each value in array + of non-negative integers. + :obj:`dpnp.searchsorted` : Find indices where elements should be inserted + to maintain order. + :obj:`dpnp.digitize` : Return the indices of the bins to which each value + in input array belongs. + :obj:`dpnp.histogram_bin_edges` : Return only the edges of the bins used + by the obj:`dpnp.histogram` function. Examples -------- @@ -289,7 +293,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): sorting_index = dpnp.argsort(tmp_a) sa = tmp_a[sorting_index] sw = tmp_w[sorting_index] - cw = dpnp.concatenate((zero, sw.cumsum())) + cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) bin_index = _search_sorted_inclusive(sa, bin_edges) cum_n += cw[bin_index] diff --git a/tests/third_party/cupy/statistics_tests/test_histogram.py b/tests/third_party/cupy/statistics_tests/test_histogram.py index ef530ddfa8ad..bb1dd8e07ce5 100644 --- a/tests/third_party/cupy/statistics_tests/test_histogram.py +++ b/tests/third_party/cupy/statistics_tests/test_histogram.py @@ -93,9 +93,8 @@ def test_histogram_range_with_density(self, xp, dtype): testing.assert_allclose(float((h * xp.diff(b)).sum()), 1) return h - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_float_dtypes() - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(atol=1e-6, type_check=False) def test_histogram_range_with_weights_and_density(self, xp, dtype): a = xp.arange(10, dtype=dtype) + 0.5 w = xp.arange(10, dtype=dtype) + 0.5 @@ -122,7 +121,6 @@ def test_histogram_weights_mismatch(self, dtype): with pytest.raises(ValueError): xp.histogram(a, range=[1, 9], weights=w, density=True) - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_allclose() def test_histogram_int_weights_dtype(self, xp, dtype): @@ -132,17 +130,15 @@ def test_histogram_int_weights_dtype(self, xp, dtype): assert xp.issubdtype(h.dtype, xp.integer) return h - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_all_dtypes(no_bool=True, no_complex=True) @testing.numpy_cupy_allclose() def test_histogram_float_weights_dtype(self, xp, dtype): # Check the type of the returned histogram a = xp.arange(10, dtype=dtype) - h, b = xp.histogram(a, weights=xp.ones(10, float)) + h, b = xp.histogram(a, weights=xp.ones(10, dtype=xp.float32)) assert xp.issubdtype(h.dtype, xp.floating) return h - @pytest.mark.skip("cumsum() is not supported with complex dtypes") def test_histogram_weights_basic(self): v = cupy.random.rand(100) w = cupy.ones(100) * 5 @@ -153,7 +149,6 @@ def test_histogram_weights_basic(self): testing.assert_array_almost_equal(a * 5, wa) testing.assert_array_almost_equal(na, nwa) - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_float_dtypes() @testing.numpy_cupy_allclose() def test_histogram_float_weights(self, xp, dtype): @@ -164,9 +159,8 @@ def test_histogram_float_weights(self, xp, dtype): testing.assert_array_almost_equal(wa, w) return wb - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) def test_histogram_int_weights(self, xp, dtype): # Check with integer weights v = xp.asarray([1, 2, 2, 4], dtype=dtype) @@ -175,9 +169,8 @@ def test_histogram_int_weights(self, xp, dtype): testing.assert_array_equal(wa, [4, 5, 0, 1]) return wa, wb - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_int_weights_normalized(self, xp, dtype): v = xp.asarray([1, 2, 2, 4], dtype=dtype) w = xp.asarray([4, 3, 2, 1], dtype=dtype) @@ -187,9 +180,8 @@ def test_histogram_int_weights_normalized(self, xp, dtype): ) return wb - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_int_dtypes(no_bool=True) - @testing.numpy_cupy_array_equal() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): # Check weights with non-uniform bin widths a, b = xp.histogram( @@ -201,9 +193,8 @@ def test_histogram_int_weights_nonuniform_bins(self, xp, dtype): testing.assert_array_almost_equal(a, [0.2, 0.1, 0.1, 0.075]) return a, b - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_complex_dtypes() - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_histogram_complex_weights(self, xp, dtype): values = xp.asarray([1.3, 2.5, 2.3]) weights = xp.asarray([1, -1, 2]) + 1j * xp.asarray([2, 1, 2]) @@ -211,9 +202,8 @@ def test_histogram_complex_weights(self, xp, dtype): a, b = xp.histogram(values, bins=2, weights=weights) return a, b - @pytest.mark.skip("cumsum() is not supported with complex dtypes") @testing.for_complex_dtypes() - @testing.numpy_cupy_array_equal(type_check=False) + @testing.numpy_cupy_array_equal() def test_histogram_complex_weights_uneven_bins(self, xp, dtype): values = xp.asarray([1.3, 2.5, 2.3]) weights = xp.asarray([1, -1, 2]) + 1j * xp.asarray([2, 1, 2]) From c24fe0bac9e71c152270cb0acc1d8a95f9cd9c8a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 10 Apr 2024 14:00:26 +0200 Subject: [PATCH 04/12] Applied pre-commit checks --- dpnp/dpnp_iface_histograms.py | 72 ++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 6de09faa9f52..fe1c97e813f2 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -1,3 +1,42 @@ +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +""" +Interface of histogram-related DPNP functions + +Notes +----- +This module is a face or public interface file for the library +it contains: + - Interface functions + - documentation for the functions + - The functions parameters check + +""" + import operator import warnings @@ -24,13 +63,12 @@ def _ravel_check_a_and_weights(a, weights): # ensure that the array is a "subtractable" dtype if a.dtype == dpnp.bool: warnings.warn( - "Converting input from {} to {} for compatibility.".format( - a.dtype, dpnp.uint8 - ), + f"Converting input from {a.dtype} to {numpy.uint8} " + "for compatibility.", RuntimeWarning, stacklevel=3, ) - a = a.astype(dpnp.uint8) + a = a.astype(numpy.uint8) if weights is not None: # check that `weights` array has supported type @@ -63,9 +101,7 @@ def _get_outer_edges(a, range): if not (numpy.isfinite(first_edge) and numpy.isfinite(last_edge)): raise ValueError( - "supplied range of [{}, {}] is not finite".format( - first_edge, last_edge - ) + f"supplied range of [{first_edge}, {last_edge}] is not finite" ) elif a.size == 0: @@ -76,9 +112,8 @@ def _get_outer_edges(a, range): first_edge, last_edge = a.min(), a.max() if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): raise ValueError( - "autodetected range of [{}, {}] is not finite".format( - first_edge, last_edge - ) + "autodetected range of [{first_edge}, {last_edge}] " + "is not finite" ) # expand empty range to avoid divide by zero @@ -99,7 +134,7 @@ def _get_bin_edges(a, bins, range): if isinstance(bins, str): raise NotImplementedError("only integer and array bins are implemented") - elif numpy.ndim(bins) == 0: + if numpy.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError as e: @@ -151,8 +186,7 @@ def _get_bin_edges(a, bins, range): usm_type=a.usm_type, ) return bin_edges, (first_edge, last_edge, n_equal_bins) - else: - return bin_edges, None + return bin_edges, None def _search_sorted_inclusive(a, v): @@ -263,7 +297,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): # We set a block size, as this allows us to iterate over chunks when # computing histograms, to minimize memory usage. - BLOCK = 65536 + block_size = 65536 # The fast path uses bincount, but that only works for certain types # of weight @@ -282,14 +316,14 @@ def histogram(a, bins=10, range=None, density=None, weights=None): # Compute via cumulative histogram cum_n = dpnp.zeros_like(bin_edges, dtype=ntype) if weights is None: - for i in _range(0, len(a), BLOCK): - sa = dpnp.sort(a[i : i + BLOCK]) + for i in _range(0, len(a), block_size): + sa = dpnp.sort(a[i : i + block_size]) cum_n += _search_sorted_inclusive(sa, bin_edges) else: zero = dpnp.zeros(1, dtype=ntype) - for i in _range(0, len(a), BLOCK): - tmp_a = a[i : i + BLOCK] - tmp_w = weights[i : i + BLOCK] + for i in _range(0, len(a), block_size): + tmp_a = a[i : i + block_size] + tmp_w = weights[i : i + block_size] sorting_index = dpnp.argsort(tmp_a) sa = tmp_a[sorting_index] sw = tmp_w[sorting_index] From 41e6f549c2630ebcd110d9b30740c76987251028 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 10 Apr 2024 16:39:26 +0200 Subject: [PATCH 05/12] Added dpnp tests --- .github/workflows/conda-package.yml | 2 + doc/reference/statistics.rst | 26 +- tests/test_arraycreation.py | 1 - tests/test_histograms.py | 415 +++++++++++++++++++++++----- 4 files changed, 357 insertions(+), 87 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index d99c9655e263..d1462fd7e416 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -22,6 +22,7 @@ env: test_dparray.py test_copy.py test_fft.py + test_histogram.py test_linalg.py test_logic.py test_manipulation.py @@ -49,6 +50,7 @@ env: third_party/cupy/math_tests third_party/cupy/sorting_tests/test_sort.py third_party/cupy/sorting_tests/test_count.py + third_party/cupy/statistics_tests/test_histogram.py third_party/cupy/statistics_tests/test_meanvar.py VER_JSON_NAME: 'version.json' VER_SCRIPT1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); " diff --git a/doc/reference/statistics.rst b/doc/reference/statistics.rst index 6a1b14db0cb5..540b6e314a62 100644 --- a/doc/reference/statistics.rst +++ b/doc/reference/statistics.rst @@ -27,13 +27,25 @@ Averages and variances dpnp.median dpnp.average dpnp.mean - dpnp.var dpnp.std + dpnp.var dpnp.nanmean dpnp.nanvar dpnp.nanstd +Correlations +------------ + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + dpnp.corrcoef + dpnp.cov + dpnp.correlate + + Histograms ---------- @@ -47,15 +59,3 @@ Histograms dpnp.bincount dpnp.histogram_bin_edges dpnp.digitize - - -Correlations ------------- - -.. autosummary:: - :toctree: generated/ - :nosignatures: - - dpnp.corrcoef - dpnp.cov - dpnp.correlate diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index 59c8c514a04f..727aa8238c89 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -7,7 +7,6 @@ import pytest from numpy.testing import ( assert_allclose, - assert_almost_equal, assert_array_equal, ) diff --git a/tests/test_histograms.py b/tests/test_histograms.py index a283c5547cc6..d0cf1279f9cd 100644 --- a/tests/test_histograms.py +++ b/tests/test_histograms.py @@ -1,89 +1,358 @@ import numpy import pytest +from numpy.testing import ( + assert_, + assert_allclose, + assert_array_equal, + assert_raises, + assert_raises_regex, + suppress_warnings, +) import dpnp -from .helper import has_support_aspect64 +from .helper import ( + assert_dtype_allclose, + get_all_dtypes, + has_support_aspect64, +) class TestHistogram: - def setup(self): - pass + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_rand_data(self, dtype): + n = 100 + v = numpy.random.rand(n).astype(dtype=dtype) + iv = dpnp.array(v, dtype=dtype) + + expected_hist, _ = numpy.histogram(v) + result_hist, _ = dpnp.histogram(iv) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_linspace_data(self, dtype): + v = numpy.linspace(0, 10, 100, dtype=dtype) + iv = dpnp.array(v) + + expected_hist, _ = numpy.histogram(v) + result_hist, _ = dpnp.histogram(iv) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.parametrize( + "data, bins_data", + [ + pytest.param([1, 2, 3, 4], [1, 2], id="1d-1d"), + pytest.param([1, 2], 1, id="1d-0d"), + ], + ) + def test_one_bin(self, data, bins_data): + a = numpy.array(data) + bins = numpy.array(bins_data) - def teardown(self): - pass + ia = dpnp.array(a) + ibins = dpnp.array(bins) + expected_hist, expected_edges = numpy.histogram(a, bins=bins) + result_hist, result_edges = dpnp.histogram(ia, bins=ibins) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_simple(self): + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_zero_bin(self, xp): + a = xp.array([1, 2]) + assert_raises(ValueError, xp.histogram, a, bins=0) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_density(self, dtype): n = 100 - v = dpnp.random.rand(n) - a, _ = dpnp.histogram(v) - # check if the sum of the bins equals the number of samples - numpy.testing.assert_equal(dpnp.sum(a, axis=0), n) - # check that the bin counts are evenly spaced when the data is from - # a linear function - a, _ = dpnp.histogram( - numpy.linspace( - 0, - 10, - 100, - dtype="float64" if has_support_aspect64() else "float32", - ) + v = numpy.random.rand(n).astype(dtype=dtype) + iv = dpnp.array(v, dtype=dtype) + + expected_hist, expected_edges = numpy.histogram(v, density=True) + result_hist, result_edges = dpnp.histogram(iv, density=True) + + if numpy.issubdtype(dtype, numpy.inexact): + tol = numpy.finfo(dtype).resolution + assert_allclose(result_hist, expected_hist, rtol=tol, atol=tol) + assert_allclose(result_edges, expected_edges, rtol=tol, atol=tol) + else: + assert_dtype_allclose(result_hist, expected_hist) + assert_dtype_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("density", [True, False]) + def test_bin_density(self, density): + bins = [0, 1, 3, 6, 10] + v = numpy.arange(10) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram( + v, bins, density=density ) - numpy.testing.assert_array_equal(a, 10) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_one_bin(self): - # Ticket 632 - hist, edges = dpnp.histogram([1, 2, 3, 4], [1, 2]) - numpy.testing.assert_array_equal( - hist, - [ - 2, - ], + result_hist, result_edges = dpnp.histogram(iv, bins, density=density) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize( + "bins", [[0, 1, 3, 6, numpy.inf], [0.5, 1.5, numpy.inf]] + ) + def test_bin_inf(self, bins): + v = numpy.arange(10) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram(v, bins, density=True) + result_hist, result_edges = dpnp.histogram(iv, bins, density=True) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize("range", [[0, 9], [1, 10]], ids=["lower", "upper"]) + def test_outliers(self, range): + a = numpy.arange(10) + 0.5 + ia = dpnp.array(a) + + expected_hist, expected_edges = numpy.histogram(a, range=range) + result_hist, result_edges = dpnp.histogram(ia, range=range) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + def test_outliers_normalization_weights(self): + range = [1, 9] + a = numpy.arange(10) + 0.5 + ia = dpnp.array(a) + + # Normalization + expected_hist, expected_edges = numpy.histogram(a, range, density=True) + result_hist, result_edges = dpnp.histogram(ia, range, density=True) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + w = numpy.arange(10) + 0.5 + iw = dpnp.array(w) + + # Weights + expected_hist, expected_edges = numpy.histogram( + a, range, weights=w, density=True ) - numpy.testing.assert_array_equal(edges, [1, 2]) - numpy.testing.assert_raises(ValueError, dpnp.histogram, [1, 2], bins=0) - h, e = dpnp.histogram([1, 2], bins=1) - numpy.testing.assert_equal(h, dpnp.array([2])) - numpy.testing.assert_allclose(e, dpnp.array([1.0, 2.0])) - - def test_density(self): - # Check that the integral of the density equals 1. - n = 100 - v = dpnp.random.rand(n) - a, b = dpnp.histogram(v, density=True) - area = dpnp.sum(a * dpnp.diff(b)[0])[0] - numpy.testing.assert_almost_equal(area, 1) + result_hist, result_edges = dpnp.histogram( + ia, range, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + expected_hist, expected_edges = numpy.histogram( + a, bins=8, range=range, weights=w, density=True + ) + result_hist, result_edges = dpnp.histogram( + ia, bins=8, range=range, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_bool_conversion(self, xp): + a = xp.array([1, 1, 0], dtype=numpy.uint8) + int_hist, int_edges = xp.histogram(a) + + with suppress_warnings() as sup: + rec = sup.record(RuntimeWarning, "Converting input from .*") + + v = xp.array([True, True, False]) + hist, edges = xp.histogram(v) + + # A warning should be issued + assert len(rec) == 1 + assert_array_equal(hist, int_hist) + assert_array_equal(edges, int_edges) + + @pytest.mark.parametrize("density", [True, False]) + def test_weights(self, density): + v = numpy.random.rand(100) + w = numpy.ones(100) * 5 + + iv = dpnp.array(v) + iw = dpnp.array(w) + + expected_hist, expected_edges = numpy.histogram( + v, weights=w, density=density + ) + result_hist, result_edges = dpnp.histogram( + iv, weights=iw, density=density + ) + assert_dtype_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + def test_integer_weights(self): + v = numpy.array([1, 2, 2, 4]) + w = numpy.array([4, 3, 2, 1]) + + iv = dpnp.array(v) + iw = dpnp.array(w) - # Check with non-constant bin widths - v = dpnp.arange(10) + expected_hist, expected_edges = numpy.histogram(v, bins=4, weights=w) + result_hist, result_edges = dpnp.histogram(iv, bins=4, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_weights_non_uniform_bin_widths(self): bins = [0, 1, 3, 6, 10] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, 0.1) - numpy.testing.assert_equal(dpnp.sum(a * dpnp.diff(b))[0], 1) - - # Test that passing False works too - a, b = dpnp.histogram(v, bins, density=False) - numpy.testing.assert_array_equal(a, [1, 2, 3, 4]) - - # Variable bin widths are especially useful to deal with - # infinities. - v = dpnp.arange(10) - bins = [0, 1, 3, 6, numpy.inf] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, [0.1, 0.1, 0.1, 0.0]) - - # Taken from a bug report from N. Becker on the numpy-discussion - # mailing list Aug. 6, 2010. - counts, _ = dpnp.histogram( - [1, 2, 3, 4], [0.5, 1.5, numpy.inf], density=True + v = numpy.arange(9) + w = numpy.array([2, 1, 1, 1, 1, 1, 1, 1, 1]) + + iv = dpnp.array(v) + iw = dpnp.array(w) + + expected_hist, expected_edges = numpy.histogram( + v, bins, weights=w, density=True + ) + result_hist, result_edges = dpnp.histogram( + iv, bins, weights=iw, density=True + ) + assert_allclose(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_weights_complex_dtype(self): + bins = [0, 2, 3] + v = numpy.array([1.3, 2.5, 2.3]) + w = numpy.array([1, -1, 2]) + 1j * numpy.array([2, 1, 2]) + + iv = dpnp.array(v) + iw = dpnp.array(w) + + # with custom bins + expected_hist, expected_edges = numpy.histogram(v, bins, weights=w) + result_hist, result_edges = dpnp.histogram(iv, bins, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + # with even bins + expected_hist, expected_edges = numpy.histogram( + v, bins=2, range=[1, 3], weights=w + ) + result_hist, result_edges = dpnp.histogram( + iv, bins=2, range=[1, 3], weights=iw + ) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + def test_no_side_effects(self): + v = dpnp.array([1.3, 2.5, 2.3]) + copy_v = v.copy() + + # check that ensures that values passed to ``histogram`` are unchanged + _, _ = dpnp.histogram(v, range=[-10, 10], bins=100) + assert (v == copy_v).all() + + def test_empty(self): + expected_hist, expected_edges = numpy.histogram( + numpy.array([]), bins=([0, 1]) + ) + result_hist, result_edges = dpnp.histogram( + dpnp.array([]), bins=([0, 1]) + ) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_error_binnum_type(self, xp): + vals = xp.linspace(0.0, 1.0, num=100) + + # `bins` must be an integer, a string, or an array + _, _ = xp.histogram(vals, 5) + assert_raises(TypeError, xp.histogram, vals, 2.4) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_finite_range(self, xp): + vals = xp.linspace(0.0, 1.0, num=100) + + # normal ranges should be fine + _, _ = xp.histogram(vals, range=[0.25, 0.75]) + assert_raises(ValueError, xp.histogram, vals, range=[xp.nan, 0.75]) + assert_raises(ValueError, xp.histogram, vals, range=[0.25, xp.inf]) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range(self, xp): + # start of range must be < end of range + vals = xp.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + xp.histogram(vals, range=[0.1, 0.01]) + + def test_bin_edge_cases(self): + v = dpnp.array([337, 404, 739, 806, 1007, 1811, 2012]) + + hist, edges = dpnp.histogram(v, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + + # floating-point computations correctly place edge cases + for x, left, right in zip(v, left_edges, right_edges): + assert_(x >= left) + assert_(x < right) + + @pytest.mark.skipif(not has_support_aspect64(), reason="fp64 required") + def test_last_bin_inclusive_range(self): + v = numpy.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0]) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram( + v, bins=30, range=(-0.5, 5) ) - numpy.testing.assert_equal(counts, [0.25, 0]) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_arr_weights_mismatch(self): - a = dpnp.arange(10) + 0.5 - w = dpnp.arange(11) + 0.5 - with numpy.testing.assert_raises_regex(ValueError, "same shape as"): - h, b = dpnp.histogram(a, range=[1, 9], weights=w, density=True) + result_hist, result_edges = dpnp.histogram(iv, bins=30, range=(-0.5, 5)) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_bin_array_dims(self, xp): + # gracefully handle bins object > 1 dimension + vals = xp.linspace(0.0, 1.0, num=100) + bins = xp.array([[0, 0.5], [0.6, 1.0]]) + with assert_raises_regex(ValueError, "must be 1d"): + xp.histogram(vals, bins=bins) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unsigned_monotonicity_check(self, xp): + # bins must increase monotonically when bins contain unsigned values + arr = xp.array([2]) + bins = xp.array([1, 3, 1], dtype="uint64") + with assert_raises(ValueError): + _, _ = xp.histogram(arr, bins=bins) + + def test_nan_values(self): + one_nan = numpy.array([0, 1, numpy.nan]) + all_nan = numpy.array([numpy.nan, numpy.nan]) + + ione_nan = dpnp.array(one_nan) + iall_nan = dpnp.array(all_nan) + + # NaN is not counted + expected_hist, expected_edges = numpy.histogram(one_nan, bins=[0, 1]) + result_hist, result_edges = dpnp.histogram(ione_nan, bins=[0, 1]) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + # NaN is not counted + expected_hist, expected_edges = numpy.histogram(all_nan, bins=[0, 1]) + result_hist, result_edges = dpnp.histogram(iall_nan, bins=[0, 1]) + assert_array_equal(result_hist, expected_hist) + assert_array_equal(result_edges, expected_edges) + + @pytest.mark.parametrize( + "dtype", + [numpy.byte, numpy.short, numpy.intc, numpy.int_, numpy.longlong], + ) + def test_signed_overflow_bounds(self, dtype): + exponent = 8 * numpy.dtype(dtype).itemsize - 1 + v = numpy.array([-(2**exponent) + 4, 2**exponent - 4], dtype=dtype) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram(v, bins=2) + result_hist, result_edges = dpnp.histogram(iv, bins=2) + assert_array_equal(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) From 30688c3d79192f0b917d34c70cf6c336539e344b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 15 Apr 2024 14:46:24 +0200 Subject: [PATCH 06/12] Added CFD tests --- dpnp/dpnp_iface_histograms.py | 23 ++++++++++++++--------- tests/test_sycl_queue.py | 24 ++++++++++++++++++++++++ tests/test_usm_type.py | 13 +++++++++++++ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index fe1c97e813f2..9ae996f1c1ba 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -59,6 +59,7 @@ def _ravel_check_a_and_weights(a, weights): # ensure that `a` array has supported type dpnp.check_supported_arrays_type(a) + usm_type = a.usm_type # ensure that the array is a "subtractable" dtype if a.dtype == dpnp.bool: @@ -73,6 +74,7 @@ def _ravel_check_a_and_weights(a, weights): if weights is not None: # check that `weights` array has supported type dpnp.check_supported_arrays_type(weights) + usm_type = dpu.get_coerced_usm_type([usm_type, weights.usm_type]) # check that arrays have the same allocation queue if dpu.get_execution_queue([a.sycl_queue, weights.sycl_queue]) is None: @@ -84,7 +86,7 @@ def _ravel_check_a_and_weights(a, weights): raise ValueError("weights should have the same shape as a.") weights = weights.ravel() a = a.ravel() - return a, weights + return a, weights, usm_type def _get_outer_edges(a, range): @@ -124,12 +126,13 @@ def _get_outer_edges(a, range): return first_edge, last_edge -def _get_bin_edges(a, bins, range): +def _get_bin_edges(a, bins, range, usm_type): """Computes the bins used internally by `histogram`.""" # parse the overloaded bins argument n_equal_bins = None bin_edges = None + sycl_queue = a.sycl_queue if isinstance(bins, str): raise NotImplementedError("only integer and array bins are implemented") @@ -154,7 +157,7 @@ def _get_bin_edges(a, bins, range): bin_edges = bins else: bin_edges = dpnp.asarray( - bins, sycl_queue=a.sycl_queue, usm_type=a.usm_type + bins, sycl_queue=sycl_queue, usm_type=usm_type ) if dpnp.any(bin_edges[:-1] > bin_edges[1:]): @@ -172,7 +175,7 @@ def _get_bin_edges(a, bins, range): bin_type = dpnp.result_type(first_edge, last_edge, a) if dpnp.issubdtype(bin_type, dpnp.integer): bin_type = dpnp.result_type( - bin_type, dpnp.default_float_type(sycl_queue=a.sycl_queue), a + bin_type, dpnp.default_float_type(sycl_queue=sycl_queue), a ) # bin edges must be computed @@ -182,8 +185,8 @@ def _get_bin_edges(a, bins, range): n_equal_bins + 1, endpoint=True, dtype=bin_type, - sycl_queue=a.sycl_queue, - usm_type=a.usm_type, + sycl_queue=sycl_queue, + usm_type=usm_type, ) return bin_edges, (first_edge, last_edge, n_equal_bins) return bin_edges, None @@ -285,9 +288,9 @@ def histogram(a, bins=10, range=None, density=None, weights=None): """ - a, weights = _ravel_check_a_and_weights(a, weights) + a, weights, usm_type = _ravel_check_a_and_weights(a, weights) - bin_edges, uniform_bins = _get_bin_edges(a, bins, range) + bin_edges, uniform_bins = _get_bin_edges(a, bins, range, usm_type) # Histogram is an integer or a float array depending on the weights. if weights is None: @@ -320,7 +323,9 @@ def histogram(a, bins=10, range=None, density=None, weights=None): sa = dpnp.sort(a[i : i + block_size]) cum_n += _search_sorted_inclusive(sa, bin_edges) else: - zero = dpnp.zeros(1, dtype=ntype) + zero = dpnp.zeros( + 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=a.usm_type + ) for i in _range(0, len(a), block_size): tmp_a = a[i : i + block_size] tmp_w = weights[i : i + block_size] diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index cdd3f9f23db7..07e34bce76c7 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -2022,3 +2022,27 @@ def test_tensorsolve(device): result_queue = result.sycl_queue assert_sycl_queue_equal(result_queue, a_dp.sycl_queue) + + +@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_histogram(weights, device): + v = numpy.arange(5) + w = weights + + iv = dpnp.array(v, device=device) + iw = None if weights is None else dpnp.array(w, sycl_queue=iv.sycl_queue) + + expected_hist, expected_edges = numpy.histogram(v, weights=w) + result_hist, result_edges = dpnp.histogram(iv, weights=iw) + assert_array_equal(result_hist, expected_hist) + assert_dtype_allclose(result_edges, expected_edges) + + hist_queue = result_hist.sycl_queue + edges_queue = result_edges.sycl_queue + assert_sycl_queue_equal(hist_queue, iv.sycl_queue) + assert_sycl_queue_equal(edges_queue, iv.sycl_queue) diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index b74e927f66dd..5e643334a231 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -1170,3 +1170,16 @@ def test_tensorsolve(usm_type_a, usm_type_b): assert a.usm_type == usm_type_a assert b.usm_type == usm_type_b assert result.usm_type == du.get_coerced_usm_type([usm_type_a, usm_type_b]) + + +@pytest.mark.parametrize("usm_type_v", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) +def test_histogram(usm_type_v, usm_type_w): + v = dp.arange(5, usm_type=usm_type_v) + w = dp.arange(7, 12, usm_type=usm_type_w) + + hist, edges = dp.histogram(v, weights=w) + assert v.usm_type == usm_type_v + assert w.usm_type == usm_type_w + assert hist.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) + assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) From 546602b95da3cc566588fce5d2239057211689b1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 15 Apr 2024 15:29:43 +0200 Subject: [PATCH 07/12] Renamed test_histogram.py --- tests/{test_histograms.py => test_histogram.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_histograms.py => test_histogram.py} (100%) diff --git a/tests/test_histograms.py b/tests/test_histogram.py similarity index 100% rename from tests/test_histograms.py rename to tests/test_histogram.py From b2b9dcf6c6748c7989241c3c5a75a84a2e2a4876 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 16 Apr 2024 19:51:59 +0200 Subject: [PATCH 08/12] Updated tests to run on Iris Xe --- tests/test_histogram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_histogram.py b/tests/test_histogram.py index d0cf1279f9cd..1e4108319b14 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -186,7 +186,7 @@ def test_weights(self, density): iv, weights=iw, density=density ) assert_dtype_allclose(result_hist, expected_hist) - assert_allclose(result_edges, expected_edges) + assert_dtype_allclose(result_edges, expected_edges) def test_integer_weights(self): v = numpy.array([1, 2, 2, 4]) From dd6e32285c258bb55011660ffb4f0e00717a0dc8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 22 Apr 2024 14:01:31 +0200 Subject: [PATCH 09/12] Get rid of block size --- dpnp/dpnp_iface_histograms.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 9ae996f1c1ba..282a7c1e702a 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -298,16 +298,12 @@ def histogram(a, bins=10, range=None, density=None, weights=None): else: ntype = weights.dtype - # We set a block size, as this allows us to iterate over chunks when - # computing histograms, to minimize memory usage. - block_size = 65536 - # The fast path uses bincount, but that only works for certain types # of weight # simple_weights = ( # weights is None or - # np.can_cast(weights.dtype, np.double) or - # np.can_cast(weights.dtype, complex) + # dpnp.can_cast(weights.dtype, dpnp.double) or + # dpnp.can_cast(weights.dtype, complex) # ) # TODO: implement a fast path simple_weights = False @@ -317,24 +313,19 @@ def histogram(a, bins=10, range=None, density=None, weights=None): pass else: # Compute via cumulative histogram - cum_n = dpnp.zeros_like(bin_edges, dtype=ntype) if weights is None: - for i in _range(0, len(a), block_size): - sa = dpnp.sort(a[i : i + block_size]) - cum_n += _search_sorted_inclusive(sa, bin_edges) + sa = dpnp.sort(a) + cum_n = _search_sorted_inclusive(sa, bin_edges) else: zero = dpnp.zeros( 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=a.usm_type ) - for i in _range(0, len(a), block_size): - tmp_a = a[i : i + block_size] - tmp_w = weights[i : i + block_size] - sorting_index = dpnp.argsort(tmp_a) - sa = tmp_a[sorting_index] - sw = tmp_w[sorting_index] - cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) - bin_index = _search_sorted_inclusive(sa, bin_edges) - cum_n += cw[bin_index] + sorting_index = dpnp.argsort(a) + sa = a[sorting_index] + sw = weights[sorting_index] + cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) + bin_index = _search_sorted_inclusive(sa, bin_edges) + cum_n = cw[bin_index] n = dpnp.diff(cum_n) From b6312e3ba862a34084f2a6025a7c1abab94d2c85 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 26 Apr 2024 14:45:28 +0200 Subject: [PATCH 10/12] Addressed review comments --- dpnp/dpnp_iface_histograms.py | 5 +++-- tests/test_histogram.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 282a7c1e702a..71bb47a1fd8f 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -114,7 +114,7 @@ def _get_outer_edges(a, range): first_edge, last_edge = a.min(), a.max() if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): raise ValueError( - "autodetected range of [{first_edge}, {last_edge}] " + f"autodetected range of [{first_edge}, {last_edge}] " "is not finite" ) @@ -135,6 +135,7 @@ def _get_bin_edges(a, bins, range, usm_type): sycl_queue = a.sycl_queue if isinstance(bins, str): + # TODO: implement support of string bins raise NotImplementedError("only integer and array bins are implemented") if numpy.ndim(bins) == 0: @@ -318,7 +319,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): cum_n = _search_sorted_inclusive(sa, bin_edges) else: zero = dpnp.zeros( - 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=a.usm_type + 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type ) sorting_index = dpnp.argsort(a) sa = a[sorting_index] diff --git a/tests/test_histogram.py b/tests/test_histogram.py index 1e4108319b14..ebe7b4715dff 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -1,3 +1,4 @@ +import dpctl import numpy import pytest from numpy.testing import ( @@ -283,6 +284,18 @@ def test_invalid_range(self, xp): with assert_raises_regex(ValueError, "max must be larger than"): xp.histogram(vals, range=[0.1, 0.01]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) + def test_infinite_edge(self, xp, inf_val): + v = xp.array([0.5, 1.5, inf_val]) + min, max = v.min(), v.max() + + # both first and last ranges must be finite + with assert_raises_regex( + ValueError, f"autodetected range of \[{min}, {max}\] is not finite" + ): + xp.histogram(v) + def test_bin_edge_cases(self): v = dpnp.array([337, 404, 739, 806, 1007, 1811, 2012]) @@ -322,7 +335,7 @@ def test_unsigned_monotonicity_check(self, xp): arr = xp.array([2]) bins = xp.array([1, 3, 1], dtype="uint64") with assert_raises(ValueError): - _, _ = xp.histogram(arr, bins=bins) + xp.histogram(arr, bins=bins) def test_nan_values(self): one_nan = numpy.array([0, 1, numpy.nan]) @@ -356,3 +369,23 @@ def test_signed_overflow_bounds(self, dtype): result_hist, result_edges = dpnp.histogram(iv, bins=2) assert_array_equal(result_hist, expected_hist) assert_allclose(result_edges, expected_edges) + + def test_string_bins_not_implemented(self): + v = dpnp.arange(5) + + # numpy support string bins, but not dpnp + _, _ = numpy.histogram(v.asnumpy(), bins="auto") + with assert_raises(NotImplementedError): + dpnp.histogram(v, bins="auto") + + def test_bins_another_sycl_queue(self): + v = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + bins = dpnp.arange(4, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram(v, bins=bins) + + def test_weights_another_sycl_queue(self): + v = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + w = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram(v, weights=w) From 422912beaf5ee6c6a07102479df87d59cc80177c Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 29 Apr 2024 12:25:30 +0200 Subject: [PATCH 11/12] Leftovers from rebase --- tests/skipped_tests_gpu.tbl | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index a72c538a52db..4688c1787fc1 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -81,9 +81,6 @@ tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPois tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_2_{lam_shape=(3, 2), shape=(4, 3, 2)}::test_poisson tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsPoisson_param_3_{lam_shape=(3, 2), shape=(3, 2)}::test_poisson -tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_0_{shape=(0,)}::test_item -tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_1_{shape=(2, 3)}::test_item -tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayItemRaise_param_2_{shape=(1, 0, 1)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_0_{shape=()}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_1_{shape=(1,)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_2_{shape=(2, 3)}::test_item From f14499bce1326812364b5ebe92e2097cef931f82 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 29 Apr 2024 13:12:10 +0200 Subject: [PATCH 12/12] Cast bin edges to default floating type if density is enabled --- dpnp/dpnp_iface_histograms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 71bb47a1fd8f..ce8c0ff90a55 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -331,7 +331,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): n = dpnp.diff(cum_n) if density: - db = dpnp.diff(bin_edges) + db = dpnp.diff(bin_edges).astype(dpnp.default_float_type()) return n / db / n.sum(), bin_edges return n, bin_edges