From 3582e234ace3db7ff1dd8a83a4480eca68a0075f Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Mon, 23 Jan 2023 14:04:30 -0600 Subject: [PATCH 1/6] Add wrapper classes for Range and NdRange. --- numba_dpex/core/kernel_interface/indexers.py | 155 +++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 numba_dpex/core/kernel_interface/indexers.py diff --git a/numba_dpex/core/kernel_interface/indexers.py b/numba_dpex/core/kernel_interface/indexers.py new file mode 100644 index 0000000000..00879dc320 --- /dev/null +++ b/numba_dpex/core/kernel_interface/indexers.py @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: 2022 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + + +class Range: + """Defines an 1, 2, or 3 dimensional index space over which a kernel is + executed. + + The Range class is analogous to SYCL's ``sycl::range`` class. + """ + + def __init__(self, dim0, dim1=None, dim2=None): + self._dim0 = dim0 + self._dim1 = dim1 + self._dim2 = dim2 + + if not self._dim0: + raise ValueError("Outermost dimension of a Range cannot be None.") + + if self._dim2 and not self._dim1: + raise ValueError( + "A 3rd dimension cannot be specified if a 2nd dimension " + "was not specified." + ) + + if not isinstance(self._dim0, int): + raise ValueError( + "The size of a dimension needs to be specified as an " + "integer value." + ) + + if self._dim1 and not isinstance(self._dim1, int): + raise ValueError( + "The size of a dimension needs to be specified as an " + "integer value." + ) + + if self._dim2 and not isinstance(self._dim2, int): + raise ValueError( + "The size of a dimension needs to be specified as an " + "integer value." + ) + + def get(self, dim): + """Returns the size of the Range in a given dimension.""" + if not isinstance(dim, int): + raise ValueError( + "The dimension needs to be specified as an integer value." + ) + + if dim == 0: + return self._dim0 + elif dim == 1: + return self._dim1 + elif dim == 2: + return self._dim2 + else: + raise ValueError( + "Unsupported dimension number. A Range " + "only has 1, 2, or 3 dimensions." + ) + + @property + def size(self): + """Returns cummulative size of the Range.""" + size = self._dim0 + if self._dim1: + size *= self._dim1 + if self._dim2: + size *= self._dim2 + + return size + + @property + def rank(self): + """Returns the rank (dimensionality) of the Range.""" + rank = 1 + + # We already checked in init that if dim2 is set that dim1 has + # to be set as well + if self._dim1: + rank += 1 + elif self._dim2: + rank += 1 + + return rank + + +class NdRange: + """Defines the iteration domain of both the work-groups and the overall + dispatch. + + The nd_range comprises two ranges: the whole range over which the kernel is + to be executed (global_size), and the range of each work group (local_size). + """ + + def _check_ndrange(self): + """Checks if the specified nd_range (global_range, local_range) are + valid. + """ + if len(self._local_range) != len(self._global_range): + raise ValueError( + "Global and local ranges should have same number of dimensions." + ) + + for i in range(len(self._global_range)): + if self._global_range[i] % self._local_range[i] != 0: + raise ValueError( + "The global work groups must be evenly divisible by the" + " local work items evenly." + ) + + def _set_range(self, range): + normalized_range = None + if isinstance(range, int): + normalized_range = Range(range) + elif isinstance(range, tuple) or isinstance(range, list): + if len(range) == 1: + normalized_range = Range(dim0=range[0]) + elif len(range == 2): + normalized_range = Range(dim0=range[0], dim1=range[1]) + elif len(range == 3): + normalized_range = Range( + dim0=range[0], + dim1=range[1], + dim2=range[2], + ) + else: + raise ValueError( + "A Range cannot have more than three dimensions." + ) + return normalized_range + + def __init__(self, *, global_range, local_range) -> None: + if global_range is None: + raise ValueError("Global range cannot be None.") + if local_range is None: + raise ValueError("Local range cannot be None.") + + self._global_range = self._set_range(global_range) + self._local_range = self._set_range(local_range) + + # check if the ndrange is sane + self._check_ndrange() + + @property + def global_range(self): + """Return the constituent global range.""" + return self._global_size + + @property + def local_range(self): + """Return the constituent local range.""" + return self._local_size From 1e72a2100d3591ecc78392cf7bac53f309c8cb82 Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Tue, 24 Jan 2023 16:27:18 -0600 Subject: [PATCH 2/6] Fix kernel lauch parameter syntax --- .../core/kernel_interface/dispatcher.py | 140 +++++++++++++----- .../kernel_tests/test_kernel_launch_params.py | 1 + 2 files changed, 100 insertions(+), 41 deletions(-) diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py index 50480c2c33..b8c1af1201 100644 --- a/numba_dpex/core/kernel_interface/dispatcher.py +++ b/numba_dpex/core/kernel_interface/dispatcher.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 +from collections.abc import Iterable from inspect import signature from warnings import warn @@ -441,6 +442,56 @@ def _determine_kernel_launch_queue(self, args, argtypes): else: raise ExecutionQueueInferenceError(self.kernel_name) + def _raise_invalid_kernel_enqueue_args(self): + error_message = ( + "Incorrect number of arguments for enqueuing numba_dpex.kernel. " + "Usage: device_env, global size, local size. " + "The local size argument is optional." + ) + raise InvalidKernelLaunchArgsError(error_message) + + def _ensure_valid_work_item_grid(self, val): + if not isinstance(val, (tuple, list, int)): + error_message = ( + "Cannot create work item dimension from provided argument" + ) + raise ValueError(error_message) + + if isinstance(val, int): + val = [val] + + # TODO: we need some way to check the max dimensions + """ + if len(val) > device_env.get_max_work_item_dims(): + error_message = ("Unsupported number of work item dimensions ") + raise ValueError(error_message) + """ + + return list( + val[::-1] + ) # reversing due to sycl and opencl interop kernel range mismatch semantic + + def _ensure_valid_work_group_size(self, val, work_item_grid): + if not isinstance(val, (tuple, list, int)): + error_message = ( + "Cannot create work item dimension from provided argument" + ) + raise ValueError(error_message) + + if isinstance(val, int): + val = [val] + + if len(val) != len(work_item_grid): + error_message = ( + "Unsupported number of work item dimensions, " + + "dimensions of global and local work items has to be the same " + ) + raise IllegalRangeValueError(error_message) + + return list( + val[::-1] + ) # reversing due to sycl and opencl interop kernel range mismatch semantic + def __getitem__(self, args): """Mimic's ``numba.cuda`` square-bracket notation for configuring the global_range and local_range settings when launching a kernel on a @@ -468,51 +519,58 @@ def __getitem__(self, args): global_range and local_range attributes initialized. """ - if isinstance(args, int): - self._global_range = [args] + # print("args =", args) + + if ( + isinstance(args, tuple) + and len(args) == 2 + and isinstance(args[0], int) + and isinstance(args[1], int) + ): + # print("----------> here") + # print("args =", args) + self._global_range = list(args) + # print("self._global_range =", self._global_range) self._local_range = None - elif isinstance(args, tuple) or isinstance(args, list): - if len(args) == 1 and all(isinstance(v, int) for v in args): - self._global_range = list(args) - self._local_range = None - elif len(args) == 2: - gr = args[0] - lr = args[1] - if isinstance(gr, int): - self._global_range = [gr] - elif len(gr) != 0 and all(isinstance(v, int) for v in gr): - self._global_range = list(gr) - else: - raise IllegalRangeValueError(kernel_name=self.kernel_name) - - if isinstance(lr, int): - self._local_range = [lr] - elif isinstance(lr, list) and len(lr) == 0: - # deprecation warning - warn( - "Specifying the local range as an empty list " - "(DEFAULT_LOCAL_SIZE) is deprecated. The kernel will " - "be executed as a basic data-parallel kernel over the " - "global range. Specify a valid local range to execute " - "the kernel as an ND-range kernel.", - DeprecationWarning, - stacklevel=2, - ) - self._local_range = None - elif len(lr) != 0 and all(isinstance(v, int) for v in lr): - self._local_range = list(lr) - else: - raise IllegalRangeValueError(kernel_name=self.kernel_name) + # print("self._local_range =", self._local_range) + return self + + if not isinstance(args, Iterable): + args = [args] + + ls = None + nargs = len(args) + # print("nargs =", nargs) + # Check if the kernel enquing arguments are sane + if nargs < 1 or nargs > 2: + self._raise_invalid_kernel_enqueue_args() + + # sycl_queue = dpctl.get_current_queue() + + gs = self._ensure_valid_work_item_grid(args[0]) + # If the optional local size argument is provided + if nargs == 2: + if args[1] != []: + ls = self._ensure_valid_work_group_size(args[1], gs) else: - raise InvalidKernelLaunchArgsError(kernel_name=self.kernel_name) + warn( + "Empty local_range calls will be deprecated in the future.", + DeprecationWarning, + ) + + self._global_range = list(gs)[::-1] + if ls: + self._local_range = list(ls)[::-1] else: - raise InvalidKernelLaunchArgsError(kernel_name=self.kernel_name) + self._local_range = None + + # print("self._global_range =", self._global_range) + # print("self._local_range =", self._local_range) - # FIXME:[::-1] is done as OpenCL and SYCl have different orders when - # it comes to specifying dimensions. - self._global_range = list(self._global_range)[::-1] - if self._local_range: - self._local_range = list(self._local_range)[::-1] + if self._global_range == [] and self._local_range is None: + raise IllegalRangeValueError( + "Illegal range values for kernel launch parameters." + ) return self diff --git a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py index fa7658623d..4cfd597d83 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py @@ -37,6 +37,7 @@ def test_1D_global_range_as_list(): assert k._local_range is None +@pytest.mark.xfail def test_1D_global_range_and_1D_local_range(): k = vecadd[10, 10] assert k._global_range == [10] From d0e46f2a25d0dc9966e0e7bb52fbf57a928f1615 Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Tue, 24 Jan 2023 21:25:24 -0600 Subject: [PATCH 3/6] Fixed kernel lauch params and added NdRange in Jitkernel --- .../core/kernel_interface/dispatcher.py | 100 +++++++++--------- numba_dpex/core/kernel_interface/indexers.py | 4 +- .../kernel_tests/test_kernel_launch_params.py | 51 +++++++-- 3 files changed, 94 insertions(+), 61 deletions(-) diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py index b8c1af1201..d3af41f327 100644 --- a/numba_dpex/core/kernel_interface/dispatcher.py +++ b/numba_dpex/core/kernel_interface/dispatcher.py @@ -5,7 +5,7 @@ from collections.abc import Iterable from inspect import signature -from warnings import warn +from warnings import simplefilter, warn import dpctl import dpctl.program as dpctl_prog @@ -32,9 +32,12 @@ UnsupportedWorkItemSizeError, ) from numba_dpex.core.kernel_interface.arg_pack_unpacker import Packer +from numba_dpex.core.kernel_interface.indexers import NdRange from numba_dpex.core.kernel_interface.spirv_kernel import SpirvKernel from numba_dpex.core.types import USMNdArray +simplefilter("always", DeprecationWarning) + def get_ordered_arg_access_types(pyfunc, access_types): """Deprecated and to be removed in next release.""" @@ -519,59 +522,56 @@ def __getitem__(self, args): global_range and local_range attributes initialized. """ - # print("args =", args) - if ( - isinstance(args, tuple) - and len(args) == 2 - and isinstance(args[0], int) - and isinstance(args[1], int) - ): - # print("----------> here") - # print("args =", args) - self._global_range = list(args) - # print("self._global_range =", self._global_range) - self._local_range = None - # print("self._local_range =", self._local_range) - return self - - if not isinstance(args, Iterable): - args = [args] - - ls = None - nargs = len(args) - # print("nargs =", nargs) - # Check if the kernel enquing arguments are sane - if nargs < 1 or nargs > 2: - self._raise_invalid_kernel_enqueue_args() - - # sycl_queue = dpctl.get_current_queue() - - gs = self._ensure_valid_work_item_grid(args[0]) - # If the optional local size argument is provided - if nargs == 2: - if args[1] != []: - ls = self._ensure_valid_work_group_size(args[1], gs) - else: + if isinstance(args, NdRange): + self._global_range = list(args.global_range)[::-1] + self._local_range = list(args.local_range)[::-1] + else: + if ( + isinstance(args, tuple) + and len(args) == 2 + and isinstance(args[0], int) + and isinstance(args[1], int) + ): warn( - "Empty local_range calls will be deprecated in the future.", + "Ambiguous kernel launch paramters. " + + "If your data have dimensions > 1, " + + "include a default/empty local_range. " + + "i.e. [(M,N), numba_dpex.DEFAULT_LOCAL_RANGE](), " + + "otherwise your code might produce erroneous results.", DeprecationWarning, ) - - self._global_range = list(gs)[::-1] - if ls: - self._local_range = list(ls)[::-1] - else: - self._local_range = None - - # print("self._global_range =", self._global_range) - # print("self._local_range =", self._local_range) - - if self._global_range == [] and self._local_range is None: - raise IllegalRangeValueError( - "Illegal range values for kernel launch parameters." - ) - + self._global_range = [args[0]] + self._local_range = [args[1]] + return self + + if not isinstance(args, Iterable): + args = [args] + + ls = None + nargs = len(args) + # Check if the kernel enquing arguments are sane + if nargs < 1 or nargs > 2: + self._raise_invalid_kernel_enqueue_args() + + gs = self._ensure_valid_work_item_grid(args[0]) + # If the optional local size argument is provided + if nargs == 2: + if args[1] != []: + ls = self._ensure_valid_work_group_size(args[1], gs) + else: + warn( + "Empty local_range calls will be deprecated in the future.", + DeprecationWarning, + ) + + self._global_range = list(gs)[::-1] + self._local_range = list(ls)[::-1] if ls else None + + if self._global_range == [] and self._local_range is None: + raise IllegalRangeValueError( + "Illegal range values for kernel launch parameters." + ) return self def _check_ranges(self, device): diff --git a/numba_dpex/core/kernel_interface/indexers.py b/numba_dpex/core/kernel_interface/indexers.py index 00879dc320..d4196d9f0e 100644 --- a/numba_dpex/core/kernel_interface/indexers.py +++ b/numba_dpex/core/kernel_interface/indexers.py @@ -147,9 +147,9 @@ def __init__(self, *, global_range, local_range) -> None: @property def global_range(self): """Return the constituent global range.""" - return self._global_size + return self._global_range @property def local_range(self): """Return the constituent local range.""" - return self._local_size + return self._local_range diff --git a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py index 4cfd597d83..82ba24d477 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py @@ -2,12 +2,15 @@ # # SPDX-License-Identifier: Apache-2.0 +import dpctl +import dpctl.tensor as dpt import pytest import numba_dpex as dpex from numba_dpex.core.exceptions import ( IllegalRangeValueError, InvalidKernelLaunchArgsError, + UnknownGlobalRangeError, ) @@ -37,20 +40,13 @@ def test_1D_global_range_as_list(): assert k._local_range is None -@pytest.mark.xfail def test_1D_global_range_and_1D_local_range(): - k = vecadd[10, 10] - assert k._global_range == [10] - assert k._local_range == [10] - - -def test_1D_global_range_and_1D_local_range2(): k = vecadd[[10, 10]] assert k._global_range == [10] assert k._local_range == [10] -def test_1D_global_range_and_1D_local_range3(): +def test_1D_global_range_and_1D_local_range2(): k = vecadd[(10,), (10,)] assert k._global_range == [10] assert k._local_range == [10] @@ -94,12 +90,45 @@ def test_deprecation_warning_for_empty_local_range2(): assert k._local_range is None +def test_ambiguous_kernel_launch_params(): + with pytest.deprecated_call(): + k = vecadd[10, 10] + assert k._global_range == [10] + assert k._local_range == [10] + + with pytest.deprecated_call(): + k = vecadd[(10, 10)] + assert k._global_range == [10] + assert k._local_range == [10] + + with pytest.deprecated_call(): + k = vecadd[((10), (10))] + assert k._global_range == [10] + assert k._local_range == [10] + + +def test_unknown_global_range_error(): + device = dpctl.select_default_device() + a = dpt.ones(10, dtype=dpt.int16, device=device) + b = dpt.ones(10, dtype=dpt.int16, device=device) + c = dpt.zeros(10, dtype=dpt.int16, device=device) + try: + vecadd(a, b, c) + except UnknownGlobalRangeError as e: + assert "No global range" in e.message + + def test_illegal_kernel_launch_arg(): + with pytest.raises(InvalidKernelLaunchArgsError): + vecadd[()] + + +def test_illegal_kernel_launch_arg2(): with pytest.raises(InvalidKernelLaunchArgsError): vecadd[10, 10, []] -def test_illegal_range_error(): +def test_illegal_range_error1(): with pytest.raises(IllegalRangeValueError): vecadd[[], []] @@ -112,3 +141,7 @@ def test_illegal_range_error2(): def test_illegal_range_error3(): with pytest.raises(IllegalRangeValueError): vecadd[(), 10] + + +if __name__ == "__main__": + test_unknown_global_range_error() From e66c0fc39b621a224af99b95cbf4c1dfa5570a45 Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Tue, 24 Jan 2023 23:39:59 -0600 Subject: [PATCH 4/6] Getting rid of core.kernel_interface.indexers.NdRange, using core.kernel_interface.utils.Ranges instead. Fix in docstrings --- .../core/kernel_interface/dispatcher.py | 4 +- numba_dpex/core/kernel_interface/indexers.py | 155 ------------------ numba_dpex/core/kernel_interface/utils.py | 146 +++++++++++++++++ numba_dpex/examples/kernel/vector_sum2D.py | 2 +- .../kernel_tests/test_ndrange_exceptions.py | 16 +- 5 files changed, 156 insertions(+), 167 deletions(-) delete mode 100644 numba_dpex/core/kernel_interface/indexers.py create mode 100644 numba_dpex/core/kernel_interface/utils.py diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py index d3af41f327..909fdabb10 100644 --- a/numba_dpex/core/kernel_interface/dispatcher.py +++ b/numba_dpex/core/kernel_interface/dispatcher.py @@ -32,8 +32,8 @@ UnsupportedWorkItemSizeError, ) from numba_dpex.core.kernel_interface.arg_pack_unpacker import Packer -from numba_dpex.core.kernel_interface.indexers import NdRange from numba_dpex.core.kernel_interface.spirv_kernel import SpirvKernel +from numba_dpex.core.kernel_interface.utils import Ranges from numba_dpex.core.types import USMNdArray simplefilter("always", DeprecationWarning) @@ -523,7 +523,7 @@ def __getitem__(self, args): """ - if isinstance(args, NdRange): + if isinstance(args, Ranges): self._global_range = list(args.global_range)[::-1] self._local_range = list(args.local_range)[::-1] else: diff --git a/numba_dpex/core/kernel_interface/indexers.py b/numba_dpex/core/kernel_interface/indexers.py deleted file mode 100644 index d4196d9f0e..0000000000 --- a/numba_dpex/core/kernel_interface/indexers.py +++ /dev/null @@ -1,155 +0,0 @@ -# SPDX-FileCopyrightText: 2022 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - - -class Range: - """Defines an 1, 2, or 3 dimensional index space over which a kernel is - executed. - - The Range class is analogous to SYCL's ``sycl::range`` class. - """ - - def __init__(self, dim0, dim1=None, dim2=None): - self._dim0 = dim0 - self._dim1 = dim1 - self._dim2 = dim2 - - if not self._dim0: - raise ValueError("Outermost dimension of a Range cannot be None.") - - if self._dim2 and not self._dim1: - raise ValueError( - "A 3rd dimension cannot be specified if a 2nd dimension " - "was not specified." - ) - - if not isinstance(self._dim0, int): - raise ValueError( - "The size of a dimension needs to be specified as an " - "integer value." - ) - - if self._dim1 and not isinstance(self._dim1, int): - raise ValueError( - "The size of a dimension needs to be specified as an " - "integer value." - ) - - if self._dim2 and not isinstance(self._dim2, int): - raise ValueError( - "The size of a dimension needs to be specified as an " - "integer value." - ) - - def get(self, dim): - """Returns the size of the Range in a given dimension.""" - if not isinstance(dim, int): - raise ValueError( - "The dimension needs to be specified as an integer value." - ) - - if dim == 0: - return self._dim0 - elif dim == 1: - return self._dim1 - elif dim == 2: - return self._dim2 - else: - raise ValueError( - "Unsupported dimension number. A Range " - "only has 1, 2, or 3 dimensions." - ) - - @property - def size(self): - """Returns cummulative size of the Range.""" - size = self._dim0 - if self._dim1: - size *= self._dim1 - if self._dim2: - size *= self._dim2 - - return size - - @property - def rank(self): - """Returns the rank (dimensionality) of the Range.""" - rank = 1 - - # We already checked in init that if dim2 is set that dim1 has - # to be set as well - if self._dim1: - rank += 1 - elif self._dim2: - rank += 1 - - return rank - - -class NdRange: - """Defines the iteration domain of both the work-groups and the overall - dispatch. - - The nd_range comprises two ranges: the whole range over which the kernel is - to be executed (global_size), and the range of each work group (local_size). - """ - - def _check_ndrange(self): - """Checks if the specified nd_range (global_range, local_range) are - valid. - """ - if len(self._local_range) != len(self._global_range): - raise ValueError( - "Global and local ranges should have same number of dimensions." - ) - - for i in range(len(self._global_range)): - if self._global_range[i] % self._local_range[i] != 0: - raise ValueError( - "The global work groups must be evenly divisible by the" - " local work items evenly." - ) - - def _set_range(self, range): - normalized_range = None - if isinstance(range, int): - normalized_range = Range(range) - elif isinstance(range, tuple) or isinstance(range, list): - if len(range) == 1: - normalized_range = Range(dim0=range[0]) - elif len(range == 2): - normalized_range = Range(dim0=range[0], dim1=range[1]) - elif len(range == 3): - normalized_range = Range( - dim0=range[0], - dim1=range[1], - dim2=range[2], - ) - else: - raise ValueError( - "A Range cannot have more than three dimensions." - ) - return normalized_range - - def __init__(self, *, global_range, local_range) -> None: - if global_range is None: - raise ValueError("Global range cannot be None.") - if local_range is None: - raise ValueError("Local range cannot be None.") - - self._global_range = self._set_range(global_range) - self._local_range = self._set_range(local_range) - - # check if the ndrange is sane - self._check_ndrange() - - @property - def global_range(self): - """Return the constituent global range.""" - return self._global_range - - @property - def local_range(self): - """Return the constituent local range.""" - return self._local_range diff --git a/numba_dpex/core/kernel_interface/utils.py b/numba_dpex/core/kernel_interface/utils.py new file mode 100644 index 0000000000..61a1434afa --- /dev/null +++ b/numba_dpex/core/kernel_interface/utils.py @@ -0,0 +1,146 @@ +class Ranges: + """A data structure to encapsulate kernel lauch parameters. + + This is just a wrapper class on top of tuples. The kernel + launch parameter is consisted of two int's (or two tuples of int's). + The first value is called `global_range` and the second value + is called `local_range`. + + The `global_range` is analogous to DPC++'s "global size" + and the `local_range` is analogous to DPC++'s "workgroup size", + respectively. + """ + + def __init__(self, global_range, local_range=None): + """Constructor for Ranges. + + Args: + global_range (tuple or int): An int or a tuple of int's + to specify DPC++'s global size. + local_range (tuple, optional): An int or a tuple of int's + to specify DPC++'s workgroup size. Defaults to None. + """ + self._global_range = global_range + self._local_range = local_range + self._check_sanity() + + def _check_sanity(self): + """Sanity checks for the global and local range tuples. + + Raises: + ValueError: If the length of global_range is more than 3, if tuple. + ValueError: If each of value global_range is not an int, if tuple. + ValueError: If the global_range is not a tuple or an int. + ValueError: If the length of local_range is more than 3, if tuple. + ValueError: If the dimensions of local_range + and global_range are not same, if tuples. + ValueError: If each of value local_range is not an int, if tuple. + ValueError: If the range limits in the global_range is not + divisible by the range limit in the local_range + at the corresponding dimension. + ValueError: If the local_range is not a tuple or an int. + """ + if isinstance(self._global_range, tuple): + if len(self._global_range) > 3: + raise ValueError( + "The maximum allowed dimension for global_range is 3." + ) + for i in range(len(self._global_range)): + if not isinstance(self._global_range[i], int): + raise ValueError("The range limit values must be an int.") + elif isinstance(self._global_range, int): + self._global_range = tuple([self._global_range]) + else: + raise ValueError("global_range must be a tuple or an int.") + if self._local_range: + if isinstance(self._local_range, tuple): + if len(self._local_range) > 3: + raise ValueError( + "The maximum allowed dimension for local_range is 3." + ) + if len(self._global_range) != len(self._local_range): + raise ValueError( + "global_range and local_range must " + + "have the same dimensions." + ) + for i in range(len(self._local_range)): + if not isinstance(self._local_range[i], int): + raise ValueError( + "The range limit values must be an int." + ) + if self._global_range[i] % self._local_range[i] != 0: + raise ValueError( + "Each limit in global_range must be divisible " + + "by each limit in local_range at " + + " the corresponding dimension." + ) + elif isinstance(self._local_range, int): + self._local_range = tuple([self._local_range]) + else: + raise ValueError("local_range must be a tuple or an int.") + + @property + def global_range(self): + """global_range accessor. + + Returns: + tuple: global_range + """ + return self._global_range + + @property + def local_range(self): + """local_range accessor. + + Returns: + tuple: local_range + """ + return self._local_range + + def __str__(self) -> str: + """str() function for this class. + + Returns: + str: str representation of a Ranges object. + """ + return ( + "(" + str(self._global_range) + ", " + str(self._local_range) + ")" + ) + + def __repr__(self) -> str: + """repr() function for this class. + + Returns: + str: str representation of a Ranges object. + """ + return self.__str__() + + +# tester +if __name__ == "__main__": + ranges = Ranges(1) + print(ranges) + + ranges = Ranges(1, 1) + print(ranges) + + ranges = Ranges((2, 2, 2), (1, 1, 1)) + print(ranges) + + ranges = Ranges((2, 2, 2)) + print(ranges) + + try: + ranges = Ranges((1, 1, 1, 1)) + except Exception as e: + print(e) + + try: + ranges = Ranges((2, 2, 2), (1, 1)) + except Exception as e: + print(e) + + try: + ranges = Ranges((3, 3, 3), (2, 2, 2)) + except Exception as e: + print(e) diff --git a/numba_dpex/examples/kernel/vector_sum2D.py b/numba_dpex/examples/kernel/vector_sum2D.py index 089721b7c1..33b7384982 100644 --- a/numba_dpex/examples/kernel/vector_sum2D.py +++ b/numba_dpex/examples/kernel/vector_sum2D.py @@ -22,7 +22,7 @@ def data_parallel_sum(a, b, c): def driver(a, b, c, global_size): - data_parallel_sum[global_size](a, b, c) + data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) def main(): diff --git a/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py b/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py index 9211a5366b..5da4cd6e45 100644 --- a/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py +++ b/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py @@ -5,10 +5,7 @@ import pytest import numba_dpex as ndpx -from numba_dpex.core.exceptions import ( - UnmatchedNumberOfRangeDimsError, - UnsupportedGroupWorkItemSizeError, -) +from numba_dpex.core.kernel_interface.utils import Ranges # Data parallel kernel implementing vector sum @@ -19,13 +16,13 @@ def kernel_vector_sum(a, b, c): @pytest.mark.parametrize( - "error, ndrange", + "error, ranges", [ - (UnmatchedNumberOfRangeDimsError, ((2, 2), (1, 1, 1))), - (UnsupportedGroupWorkItemSizeError, ((3, 3, 3), (2, 2, 2))), + (ValueError, ((2, 2), (1, 1, 1))), + (ValueError, ((3, 3, 3), (2, 2, 2))), ], ) -def test_ndrange_config_error(error, ndrange): +def test_ndrange_config_error(error, ranges): """Test if a exception is raised when calling a ndrange kernel with unspported arguments. """ @@ -35,4 +32,5 @@ def test_ndrange_config_error(error, ndrange): c = dpt.zeros(1024, dtype=dpt.int64) with pytest.raises(error): - kernel_vector_sum[ndrange](a, b, c) + range = Ranges(ranges[0], ranges[1]) + kernel_vector_sum[range](a, b, c) From 64b81a909efa712df0be90877f828e9f9a780321 Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Wed, 25 Jan 2023 20:43:25 -0600 Subject: [PATCH 5/6] A better implementation for the Range/NdRange class, with examples Better deprecation warnings added Following exact sycl::range/nd_range specification for kernel lauch parameters Default cache size is set to 128, like numba We don't need simplefilter --- numba_dpex/config.py | 2 +- .../core/kernel_interface/dispatcher.py | 126 ++++----- numba_dpex/core/kernel_interface/utils.py | 254 +++++++++++------- .../examples/kernel/kernel_specialization.py | 2 + numba_dpex/examples/kernel/vector_sum2D.py | 11 + .../kernel_tests/test_kernel_launch_params.py | 8 +- .../kernel_tests/test_ndrange_exceptions.py | 8 +- 7 files changed, 238 insertions(+), 173 deletions(-) diff --git a/numba_dpex/config.py b/numba_dpex/config.py index 7fc121d059..146cb35301 100644 --- a/numba_dpex/config.py +++ b/numba_dpex/config.py @@ -99,7 +99,7 @@ def __getattr__(name): ENABLE_CACHE = _readenv("NUMBA_DPEX_ENABLE_CACHE", int, 1) # Capacity of the cache, execute it like: # NUMBA_DPEX_CACHE_SIZE=20 python -CACHE_SIZE = _readenv("NUMBA_DPEX_CACHE_SIZE", int, 10) +CACHE_SIZE = _readenv("NUMBA_DPEX_CACHE_SIZE", int, 128) TESTING_SKIP_NO_DPNP = _readenv("NUMBA_DPEX_TESTING_SKIP_NO_DPNP", int, 0) TESTING_SKIP_NO_DEBUGGING = _readenv( diff --git a/numba_dpex/core/kernel_interface/dispatcher.py b/numba_dpex/core/kernel_interface/dispatcher.py index 909fdabb10..feb6ba6bbc 100644 --- a/numba_dpex/core/kernel_interface/dispatcher.py +++ b/numba_dpex/core/kernel_interface/dispatcher.py @@ -5,7 +5,7 @@ from collections.abc import Iterable from inspect import signature -from warnings import simplefilter, warn +from warnings import warn import dpctl import dpctl.program as dpctl_prog @@ -33,11 +33,9 @@ ) from numba_dpex.core.kernel_interface.arg_pack_unpacker import Packer from numba_dpex.core.kernel_interface.spirv_kernel import SpirvKernel -from numba_dpex.core.kernel_interface.utils import Ranges +from numba_dpex.core.kernel_interface.utils import NdRange, Range from numba_dpex.core.types import USMNdArray -simplefilter("always", DeprecationWarning) - def get_ordered_arg_access_types(pyfunc, access_types): """Deprecated and to be removed in next release.""" @@ -445,56 +443,6 @@ def _determine_kernel_launch_queue(self, args, argtypes): else: raise ExecutionQueueInferenceError(self.kernel_name) - def _raise_invalid_kernel_enqueue_args(self): - error_message = ( - "Incorrect number of arguments for enqueuing numba_dpex.kernel. " - "Usage: device_env, global size, local size. " - "The local size argument is optional." - ) - raise InvalidKernelLaunchArgsError(error_message) - - def _ensure_valid_work_item_grid(self, val): - if not isinstance(val, (tuple, list, int)): - error_message = ( - "Cannot create work item dimension from provided argument" - ) - raise ValueError(error_message) - - if isinstance(val, int): - val = [val] - - # TODO: we need some way to check the max dimensions - """ - if len(val) > device_env.get_max_work_item_dims(): - error_message = ("Unsupported number of work item dimensions ") - raise ValueError(error_message) - """ - - return list( - val[::-1] - ) # reversing due to sycl and opencl interop kernel range mismatch semantic - - def _ensure_valid_work_group_size(self, val, work_item_grid): - if not isinstance(val, (tuple, list, int)): - error_message = ( - "Cannot create work item dimension from provided argument" - ) - raise ValueError(error_message) - - if isinstance(val, int): - val = [val] - - if len(val) != len(work_item_grid): - error_message = ( - "Unsupported number of work item dimensions, " - + "dimensions of global and local work items has to be the same " - ) - raise IllegalRangeValueError(error_message) - - return list( - val[::-1] - ) # reversing due to sycl and opencl interop kernel range mismatch semantic - def __getitem__(self, args): """Mimic's ``numba.cuda`` square-bracket notation for configuring the global_range and local_range settings when launching a kernel on a @@ -522,8 +470,11 @@ def __getitem__(self, args): global_range and local_range attributes initialized. """ - - if isinstance(args, Ranges): + if isinstance(args, Range): + # we need inversions, see github issue #889 + self._global_range = list(args)[::-1] + elif isinstance(args, NdRange): + # we need inversions, see github issue #889 self._global_range = list(args.global_range)[::-1] self._local_range = list(args.local_range)[::-1] else: @@ -534,44 +485,73 @@ def __getitem__(self, args): and isinstance(args[1], int) ): warn( - "Ambiguous kernel launch paramters. " - + "If your data have dimensions > 1, " - + "include a default/empty local_range. " - + "i.e. [(M,N), numba_dpex.DEFAULT_LOCAL_RANGE](), " + "Ambiguous kernel launch paramters. If your data have " + + "dimensions > 1, include a default/empty local_range:\n" + + " [(X,Y), numba_dpex.DEFAULT_LOCAL_RANGE]()\n" + "otherwise your code might produce erroneous results.", DeprecationWarning, + stacklevel=2, ) self._global_range = [args[0]] self._local_range = [args[1]] return self - if not isinstance(args, Iterable): - args = [args] + warn( + "The current syntax for specification of kernel lauch " + + "parameters is deprecated. Users should set the kernel " + + "parameters through Range/NdRange classes.\n" + + "Example:\n" + + " from numba_dpex.core.kernel_interface.utils import Range,NdRange\n\n" + + " # for global range only\n" + + " [Range(X,Y)]()\n" + + " # or,\n" + + " # for both global and local ranges\n" + + " [NdRange((X,Y), (P,Q))]()", + DeprecationWarning, + stacklevel=2, + ) - ls = None + args = [args] if not isinstance(args, Iterable) else args nargs = len(args) + # Check if the kernel enquing arguments are sane if nargs < 1 or nargs > 2: - self._raise_invalid_kernel_enqueue_args() + raise InvalidKernelLaunchArgsError(kernel_name=self.kernel_name) - gs = self._ensure_valid_work_item_grid(args[0]) + g_range = ( + [args[0]] if not isinstance(args[0], Iterable) else args[0] + ) # If the optional local size argument is provided + l_range = None if nargs == 2: if args[1] != []: - ls = self._ensure_valid_work_group_size(args[1], gs) + l_range = ( + [args[1]] + if not isinstance(args[1], Iterable) + else args[1] + ) else: warn( - "Empty local_range calls will be deprecated in the future.", + "Empty local_range calls are deprecated. Please use Range/NdRange " + + "to specify the kernel launch parameters:\n" + + "Example:\n" + + " from numba_dpex.core.kernel_interface.utils import Range,NdRange\n\n" + + " # for global range only\n" + + " [Range(X,Y)]()\n" + + " # or,\n" + + " # for both global and local ranges\n" + + " [NdRange((X,Y), (P,Q))]()", DeprecationWarning, + stacklevel=2, ) - self._global_range = list(gs)[::-1] - self._local_range = list(ls)[::-1] if ls else None + if len(g_range) < 1: + raise IllegalRangeValueError(kernel_name=self.kernel_name) + + # we need inversions, see github issue #889 + self._global_range = list(g_range)[::-1] + self._local_range = list(l_range)[::-1] if l_range else None - if self._global_range == [] and self._local_range is None: - raise IllegalRangeValueError( - "Illegal range values for kernel launch parameters." - ) return self def _check_ranges(self, device): diff --git a/numba_dpex/core/kernel_interface/utils.py b/numba_dpex/core/kernel_interface/utils.py index 61a1434afa..55431cc2bb 100644 --- a/numba_dpex/core/kernel_interface/utils.py +++ b/numba_dpex/core/kernel_interface/utils.py @@ -1,146 +1,218 @@ -class Ranges: - """A data structure to encapsulate kernel lauch parameters. +from collections.abc import Iterable - This is just a wrapper class on top of tuples. The kernel - launch parameter is consisted of two int's (or two tuples of int's). - The first value is called `global_range` and the second value - is called `local_range`. - The `global_range` is analogous to DPC++'s "global size" - and the `local_range` is analogous to DPC++'s "workgroup size", - respectively. +class Range(tuple): + """A data structure to encapsulate a single kernel lauch parameter. + + The range is an abstraction that describes the number of elements + in each dimension of buffers and index spaces. It can contain + 1, 2, or 3 numbers, dependending on the dimensionality of the + object it describes. + + This is just a wrapper class on top of a 3-tuple. The kernel launch + parameter is consisted of three int's. This class basically mimics + the behavior of `sycl::range`. """ - def __init__(self, global_range, local_range=None): - """Constructor for Ranges. + def __new__(cls, dim0, dim1=None, dim2=None): + """Constructs a 1, 2, or 3 dimensional range. Args: - global_range (tuple or int): An int or a tuple of int's - to specify DPC++'s global size. - local_range (tuple, optional): An int or a tuple of int's - to specify DPC++'s workgroup size. Defaults to None. + dim0 (int): The range of the first dimension. + dim1 (int, optional): The range of second dimension. + Defaults to None. + dim2 (int, optional): The range of the third dimension. + Defaults to None. + + Raises: + TypeError: If dim0 is not an int. + TypeError: If dim1 is not an int. + TypeError: If dim2 is not an int. """ - self._global_range = global_range - self._local_range = local_range - self._check_sanity() + if not isinstance(dim0, int): + raise TypeError("dim0 of a Range must be an int.") + _values = [dim0] + if dim1: + if not isinstance(dim1, int): + raise TypeError("dim1 of a Range must be an int.") + _values.append(dim1) + if dim2: + if not isinstance(dim2, int): + raise TypeError("dim2 of a Range must be an int.") + _values.append(dim2) + return super(Range, cls).__new__(cls, tuple(_values)) + + def get(self, index): + """Returns the range of a single dimension. - def _check_sanity(self): - """Sanity checks for the global and local range tuples. + Args: + index (int): The index of the dimension, i.e. [0,2] - Raises: - ValueError: If the length of global_range is more than 3, if tuple. - ValueError: If each of value global_range is not an int, if tuple. - ValueError: If the global_range is not a tuple or an int. - ValueError: If the length of local_range is more than 3, if tuple. - ValueError: If the dimensions of local_range - and global_range are not same, if tuples. - ValueError: If each of value local_range is not an int, if tuple. - ValueError: If the range limits in the global_range is not - divisible by the range limit in the local_range - at the corresponding dimension. - ValueError: If the local_range is not a tuple or an int. + Returns: + int: The range of the dimension indexed by `index`. + """ + return self[index] + + def size(self): + """Returns the size of a range. + + Returns the size of a range by multiplying + the range of the individual dimensions. + + Returns: + int: The size of a range. """ - if isinstance(self._global_range, tuple): - if len(self._global_range) > 3: - raise ValueError( - "The maximum allowed dimension for global_range is 3." - ) - for i in range(len(self._global_range)): - if not isinstance(self._global_range[i], int): - raise ValueError("The range limit values must be an int.") - elif isinstance(self._global_range, int): - self._global_range = tuple([self._global_range]) + n = len(self) + if n > 2: + return self[0] * self[1] * self[2] + elif n > 1: + return self[0] * self[1] else: - raise ValueError("global_range must be a tuple or an int.") - if self._local_range: - if isinstance(self._local_range, tuple): - if len(self._local_range) > 3: - raise ValueError( - "The maximum allowed dimension for local_range is 3." - ) - if len(self._global_range) != len(self._local_range): - raise ValueError( - "global_range and local_range must " - + "have the same dimensions." - ) - for i in range(len(self._local_range)): - if not isinstance(self._local_range[i], int): - raise ValueError( - "The range limit values must be an int." - ) - if self._global_range[i] % self._local_range[i] != 0: - raise ValueError( - "Each limit in global_range must be divisible " - + "by each limit in local_range at " - + " the corresponding dimension." - ) - elif isinstance(self._local_range, int): - self._local_range = tuple([self._local_range]) - else: - raise ValueError("local_range must be a tuple or an int.") + return self[0] + + +class NdRange: + """A class to encapsulate all kernel launch parameters. + + The NdRange defines the index space for a work group as well as + the global index space. It is passed to parallel_for to execute + a kernel on a set of work items. + + This class basically contains two Range object, one for the global_range + and the other for the local_range. The global_range parameter contains + the global index space and the local_range parameter contains the index + space of a work group. This class mimics the behavior of `sycl::nd_range` + class. + """ + + def __init__(self, global_size, local_size): + """Constructor for NdRange class. + + Args: + global_size (Range or tuple of int's): The values for + the global_range. + local_size (Range or tuple of int's, optional): The values for + the local_range. Defaults to None. + """ + if isinstance(global_size, Range): + self._global_range = global_size + elif isinstance(global_size, Iterable): + self._global_range = Range(*global_size) + else: + TypeError("Unknwon argument type for NdRange global_size.") + + if isinstance(local_size, Range): + self._local_range = local_size + elif isinstance(local_size, Iterable): + self._local_range = Range(*local_size) + else: + TypeError("Unknwon argument type for NdRange local_size.") @property def global_range(self): - """global_range accessor. + """Accessor for global_range. Returns: - tuple: global_range + Range: The `global_range` `Range` object. """ return self._global_range @property def local_range(self): - """local_range accessor. + """Accessor for local_range. + + Returns: + Range: The `local_range` `Range` object. + """ + return self._local_range + + def get_global_range(self): + """Returns a Range defining the index space. + + Returns: + Range: A `Range` object defining the index space. + """ + return self._global_range + + def get_local_range(self): + """Returns a Range defining the index space of a work group. Returns: - tuple: local_range + Range: A `Range` object to specify index space of a work group. """ return self._local_range - def __str__(self) -> str: - """str() function for this class. + def __str__(self): + """str() function for NdRange class. Returns: - str: str representation of a Ranges object. + str: str representation for NdRange class. """ return ( "(" + str(self._global_range) + ", " + str(self._local_range) + ")" ) - def __repr__(self) -> str: - """repr() function for this class. + def __repr__(self): + """repr() function for NdRange class. Returns: - str: str representation of a Ranges object. + str: str representation for NdRange class. """ return self.__str__() -# tester if __name__ == "__main__": - ranges = Ranges(1) - print(ranges) + r1 = Range(1) + print("r1 =", r1) - ranges = Ranges(1, 1) - print(ranges) + r2 = Range(1, 2) + print("r2 =", r2) - ranges = Ranges((2, 2, 2), (1, 1, 1)) - print(ranges) + r3 = Range(1, 2, 3) + print("r3 =", r3, ", len(r3) =", len(r3)) - ranges = Ranges((2, 2, 2)) - print(ranges) + r3 = Range(*(1, 2, 3)) + print("r3 =", r3, ", len(r3) =", len(r3)) + r3 = Range(*[1, 2, 3]) + print("r3 =", r3, ", len(r3) =", len(r3)) + + print("r1.get(0) =", r1.get(0)) try: - ranges = Ranges((1, 1, 1, 1)) + print("r2.get(2) =", r2.get(2)) except Exception as e: print(e) + print("r3.get(0) =", r3.get(0)) + print("r3.get(1) =", r3.get(1)) + + print("r1[0] =", r1[0]) try: - ranges = Ranges((2, 2, 2), (1, 1)) + print("r2[2] =", r2[2]) except Exception as e: print(e) + print("r3[0] =", r3[0]) + print("r3[1] =", r3[1]) + try: - ranges = Ranges((3, 3, 3), (2, 2, 2)) + r4 = Range(1, 2, 3, 4) except Exception as e: print(e) + + try: + r5 = Range(*(1, 2, 3, 4)) + except Exception as e: + print(e) + + ndr1 = NdRange(Range(1, 2)) + print("ndr1 =", ndr1) + + ndr2 = NdRange(Range(1, 2), Range(1, 1, 1)) + print("ndr2 =", ndr2) + + ndr3 = NdRange((1, 2)) + print("ndr3 =", ndr3) + + ndr4 = NdRange((1, 2), (1, 1, 1)) + print("ndr4 =", ndr4) diff --git a/numba_dpex/examples/kernel/kernel_specialization.py b/numba_dpex/examples/kernel/kernel_specialization.py index a3cd7fa759..8e46949bd9 100644 --- a/numba_dpex/examples/kernel/kernel_specialization.py +++ b/numba_dpex/examples/kernel/kernel_specialization.py @@ -128,3 +128,5 @@ def data_parallel_sum2(a, b, c): "strings." ) print(e) + +print("Done...") diff --git a/numba_dpex/examples/kernel/vector_sum2D.py b/numba_dpex/examples/kernel/vector_sum2D.py index 33b7384982..4e38f0bc4b 100644 --- a/numba_dpex/examples/kernel/vector_sum2D.py +++ b/numba_dpex/examples/kernel/vector_sum2D.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -25,6 +26,11 @@ def driver(a, b, c, global_size): data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) +def driver_with_range(a, b, c, global_size): + ranges = Range(*global_size) + data_parallel_sum[ranges](a, b, c) + + def main(): # Array dimensions X = 8 @@ -48,8 +54,13 @@ def main(): print("Using device ...") device.print_device_info() + print("Running kernel ...") driver(a_dpt, b_dpt, c_dpt, global_size) + c_out = dpt.asnumpy(c_dpt) + assert np.allclose(c, c_out) + print("Running kernel with the new lanuch parameter syntax ...") + driver_with_range(a_dpt, b_dpt, c_dpt, global_size) c_out = dpt.asnumpy(c_dpt) assert np.allclose(c, c_out) diff --git a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py index 82ba24d477..4e6d697329 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py @@ -40,7 +40,7 @@ def test_1D_global_range_as_list(): assert k._local_range is None -def test_1D_global_range_and_1D_local_range(): +def test_1D_global_range_and_1D_local_range1(): k = vecadd[[10, 10]] assert k._global_range == [10] assert k._local_range == [10] @@ -52,7 +52,7 @@ def test_1D_global_range_and_1D_local_range2(): assert k._local_range == [10] -def test_2D_global_range_and_2D_local_range(): +def test_2D_global_range_and_2D_local_range1(): k = vecadd[(10, 10), (10, 10)] assert k._global_range == [10, 10] assert k._local_range == [10, 10] @@ -76,7 +76,7 @@ def test_2D_global_range_and_2D_local_range4(): assert k._local_range == [10, 10] -def test_deprecation_warning_for_empty_local_range(): +def test_deprecation_warning_for_empty_local_range1(): with pytest.deprecated_call(): k = vecadd[[10, 10], []] assert k._global_range == [10, 10] @@ -118,7 +118,7 @@ def test_unknown_global_range_error(): assert "No global range" in e.message -def test_illegal_kernel_launch_arg(): +def test_illegal_kernel_launch_arg1(): with pytest.raises(InvalidKernelLaunchArgsError): vecadd[()] diff --git a/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py b/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py index 5da4cd6e45..aa4e9e33b6 100644 --- a/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py +++ b/numba_dpex/tests/kernel_tests/test_ndrange_exceptions.py @@ -5,7 +5,7 @@ import pytest import numba_dpex as ndpx -from numba_dpex.core.kernel_interface.utils import Ranges +from numba_dpex.core.kernel_interface.utils import NdRange # Data parallel kernel implementing vector sum @@ -18,8 +18,8 @@ def kernel_vector_sum(a, b, c): @pytest.mark.parametrize( "error, ranges", [ - (ValueError, ((2, 2), (1, 1, 1))), - (ValueError, ((3, 3, 3), (2, 2, 2))), + (TypeError, ((2, 2), ("a", 1, 1))), + (TypeError, ((3, 3, 3, 3), (2, 2, 2))), ], ) def test_ndrange_config_error(error, ranges): @@ -32,5 +32,5 @@ def test_ndrange_config_error(error, ranges): c = dpt.zeros(1024, dtype=dpt.int64) with pytest.raises(error): - range = Ranges(ranges[0], ranges[1]) + range = NdRange(ranges[0], ranges[1]) kernel_vector_sum[range](a, b, c) From 57c7d108be0b6d7cbdacefb62e342a2bf9937667 Mon Sep 17 00:00:00 2001 From: "akmkhale@ansatnuc04" Date: Mon, 30 Jan 2023 10:13:24 -0600 Subject: [PATCH 6/6] Updating all examples to work with Range/NdRange --- numba_dpex/examples/debug/dpex_func.py | 3 ++- numba_dpex/examples/debug/side-by-side-2.py | 3 ++- numba_dpex/examples/debug/side-by-side.py | 3 ++- numba_dpex/examples/debug/simple_dpex_func.py | 3 ++- numba_dpex/examples/debug/simple_sum.py | 3 ++- numba_dpex/examples/debug/sum.py | 3 ++- numba_dpex/examples/debug/sum_local_vars.py | 3 ++- numba_dpex/examples/debug/sum_local_vars_revive.py | 3 ++- numba_dpex/examples/kernel/atomic_op.py | 3 ++- numba_dpex/examples/kernel/black_scholes.py | 5 ++++- numba_dpex/examples/kernel/device_func.py | 11 ++++++----- numba_dpex/examples/kernel/interpolation.py | 11 ++++++++--- .../examples/kernel/kernel_private_memory.py | 7 ++++--- .../examples/kernel/kernel_specialization.py | 9 +++++---- numba_dpex/examples/kernel/matmul.py | 7 ++++--- numba_dpex/examples/kernel/pairwise_distance.py | 7 ++++--- numba_dpex/examples/kernel/scan.py | 3 ++- .../examples/kernel/select_device_for_kernel.py | 5 +++-- numba_dpex/examples/kernel/sum_reduction_ocl.py | 7 ++++--- .../examples/kernel/sum_reduction_recursive_ocl.py | 11 +++++++---- numba_dpex/examples/kernel/vector_sum.py | 3 ++- numba_dpex/examples/kernel/vector_sum2D.py | 14 ++------------ numba_dpex/examples/sum_reduction.py | 3 ++- 23 files changed, 75 insertions(+), 55 deletions(-) diff --git a/numba_dpex/examples/debug/dpex_func.py b/numba_dpex/examples/debug/dpex_func.py index bc095c65af..4acc97d763 100644 --- a/numba_dpex/examples/debug/dpex_func.py +++ b/numba_dpex/examples/debug/dpex_func.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.func(debug=True) @@ -24,7 +25,7 @@ def driver(a, b, c, global_size): print("a = ", a) print("b = ", b) print("c = ", c) - kernel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + kernel_sum[Range(global_size)](a, b, c) print("a + b = ", c) diff --git a/numba_dpex/examples/debug/side-by-side-2.py b/numba_dpex/examples/debug/side-by-side-2.py index 119a6fd7dc..4c9797a856 100644 --- a/numba_dpex/examples/debug/side-by-side-2.py +++ b/numba_dpex/examples/debug/side-by-side-2.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range def common_loop_body(i, a, b): @@ -50,7 +51,7 @@ def numba_func_driver(a, b, c): def dpex_func_driver(a, b, c): device = dpctl.select_default_device() with dpctl.device_context(device): - kernel[len(c), dpex.DEFAULT_LOCAL_SIZE](a, b, c) + kernel[Range(len(c))](a, b, c) @dpex.kernel(debug=True) diff --git a/numba_dpex/examples/debug/side-by-side.py b/numba_dpex/examples/debug/side-by-side.py index d915c1c886..9f7c0db66a 100644 --- a/numba_dpex/examples/debug/side-by-side.py +++ b/numba_dpex/examples/debug/side-by-side.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range def common_loop_body(param_a, param_b): @@ -48,7 +49,7 @@ def numba_func_driver(a, b, c): def dpex_func_driver(a, b, c): device = dpctl.select_default_device() with dpctl.device_context(device): - kernel[len(c), dpex.DEFAULT_LOCAL_SIZE](a, b, c) + kernel[Range(len(c))](a, b, c) @dpex.kernel(debug=True) diff --git a/numba_dpex/examples/debug/simple_dpex_func.py b/numba_dpex/examples/debug/simple_dpex_func.py index fbd57349d8..976430dd11 100644 --- a/numba_dpex/examples/debug/simple_dpex_func.py +++ b/numba_dpex/examples/debug/simple_dpex_func.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.func(debug=True) @@ -27,6 +28,6 @@ def kernel_sum(a_in_kernel, b_in_kernel, c_in_kernel): device = dpctl.select_default_device() with dpctl.device_context(device): - kernel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + kernel_sum[Range(global_size)](a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/simple_sum.py b/numba_dpex/examples/debug/simple_sum.py index 5a0ea67e0f..e55d1328ff 100644 --- a/numba_dpex/examples/debug/simple_sum.py +++ b/numba_dpex/examples/debug/simple_sum.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.kernel(debug=True) @@ -23,6 +24,6 @@ def data_parallel_sum(a, b, c): device = dpctl.select_default_device() with dpctl.device_context(device): - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + data_parallel_sum[Range(global_size)](a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/sum.py b/numba_dpex/examples/debug/sum.py index ec44fff306..72cca927b2 100644 --- a/numba_dpex/examples/debug/sum.py +++ b/numba_dpex/examples/debug/sum.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.kernel(debug=True) @@ -20,7 +21,7 @@ def driver(a, b, c, global_size): print("before : ", a) print("before : ", b) print("before : ", c) - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + data_parallel_sum[Range(global_size)](a, b, c) print("after : ", c) diff --git a/numba_dpex/examples/debug/sum_local_vars.py b/numba_dpex/examples/debug/sum_local_vars.py index 72ec60a1fb..3d3b2a9a9c 100644 --- a/numba_dpex/examples/debug/sum_local_vars.py +++ b/numba_dpex/examples/debug/sum_local_vars.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.kernel(debug=True) @@ -25,6 +26,6 @@ def data_parallel_sum(a, b, c): device = dpctl.select_default_device() with dpctl.device_context(device): - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + data_parallel_sum[Range(global_size)](a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/sum_local_vars_revive.py b/numba_dpex/examples/debug/sum_local_vars_revive.py index f50e22f663..386d54ee77 100644 --- a/numba_dpex/examples/debug/sum_local_vars_revive.py +++ b/numba_dpex/examples/debug/sum_local_vars_revive.py @@ -6,6 +6,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.func @@ -31,6 +32,6 @@ def data_parallel_sum(a, b, c): device = dpctl.select_default_device() with dpctl.device_context(device): - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) + data_parallel_sum[Range(global_size)](a, b, c) print("Done...") diff --git a/numba_dpex/examples/kernel/atomic_op.py b/numba_dpex/examples/kernel/atomic_op.py index 2e10f7cc18..653fbc15fe 100644 --- a/numba_dpex/examples/kernel/atomic_op.py +++ b/numba_dpex/examples/kernel/atomic_op.py @@ -5,6 +5,7 @@ import dpnp as np import numba_dpex as ndpex +from numba_dpex.core.kernel_interface.utils import Range @ndpex.kernel @@ -20,7 +21,7 @@ def main(): print("Using device ...") print(a.device) - atomic_reduction[N](a) + atomic_reduction[Range(N)](a) print("Reduction sum =", a[0]) print("Done...") diff --git a/numba_dpex/examples/kernel/black_scholes.py b/numba_dpex/examples/kernel/black_scholes.py index 3f6e9c5bd6..75be6d3b0d 100644 --- a/numba_dpex/examples/kernel/black_scholes.py +++ b/numba_dpex/examples/kernel/black_scholes.py @@ -8,6 +8,7 @@ import dpnp as np import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # Stock price range S0L = 10.0 @@ -94,7 +95,9 @@ def main(): print("Using device ...") print(price.device) - kernel_black_scholes[NOPT](price, strike, t, rate, volatility, call, put) + kernel_black_scholes[Range(NOPT)]( + price, strike, t, rate, volatility, call, put + ) print("Call:", call) print("Put:", put) diff --git a/numba_dpex/examples/kernel/device_func.py b/numba_dpex/examples/kernel/device_func.py index 1c6fe52d39..80089a70fb 100644 --- a/numba_dpex/examples/kernel/device_func.py +++ b/numba_dpex/examples/kernel/device_func.py @@ -6,6 +6,7 @@ import numba_dpex as ndpex from numba_dpex import float32, int32, int64 +from numba_dpex.core.kernel_interface.utils import Range # Array size N = 10 @@ -69,7 +70,7 @@ def test1(): print("A=", a) try: - a_kernel_function[N](a, b) + a_kernel_function[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -87,7 +88,7 @@ def test2(): print("A=", a) try: - a_kernel_function_int32[N](a, b) + a_kernel_function_int32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -105,7 +106,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -119,7 +120,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) @@ -134,7 +135,7 @@ def test3(): print("A=", a) try: - a_kernel_function_int32_float32[N](a, b) + a_kernel_function_int32_float32[Range(N)](a, b) except Exception as err: print(err) print("B=", b) diff --git a/numba_dpex/examples/kernel/interpolation.py b/numba_dpex/examples/kernel/interpolation.py index 7568ad60e7..3aa3c91765 100644 --- a/numba_dpex/examples/kernel/interpolation.py +++ b/numba_dpex/examples/kernel/interpolation.py @@ -7,6 +7,7 @@ from numpy.testing import assert_almost_equal import numba_dpex as ndpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range # Interpolation domain XLO = 10.0 @@ -114,9 +115,13 @@ def main(): print("Using device ...") print(xp.device) - global_range = (N_POINTS // N_POINTS_PER_WORK_ITEM,) - local_range = (LOCAL_SIZE,) - kernel_polynomial[global_range, local_range](xp, yp, COEFFICIENTS) + global_range = Range( + N_POINTS // N_POINTS_PER_WORK_ITEM, + ) + local_range = Range( + LOCAL_SIZE, + ) + kernel_polynomial[NdRange(global_range, local_range)](xp, yp, COEFFICIENTS) # Copy results back to the host nyp = np.asnumpy(yp) diff --git a/numba_dpex/examples/kernel/kernel_private_memory.py b/numba_dpex/examples/kernel/kernel_private_memory.py index 089f8b41d4..3219281f7c 100644 --- a/numba_dpex/examples/kernel/kernel_private_memory.py +++ b/numba_dpex/examples/kernel/kernel_private_memory.py @@ -8,6 +8,7 @@ from numba import float32 import numba_dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range def private_memory(): @@ -39,9 +40,9 @@ def private_memory_kernel(A): print("Using device ...") device.print_device_info() - global_range = (N,) - local_range = (N,) - private_memory_kernel[global_range, local_range](arr) + global_range = Range(N) + local_range = Range(N) + private_memory_kernel[NdRange(global_range, local_range)](arr) arr_out = dpt.asnumpy(arr) np.testing.assert_allclose(orig * 2, arr_out) diff --git a/numba_dpex/examples/kernel/kernel_specialization.py b/numba_dpex/examples/kernel/kernel_specialization.py index 8e46949bd9..e1aff12c23 100644 --- a/numba_dpex/examples/kernel/kernel_specialization.py +++ b/numba_dpex/examples/kernel/kernel_specialization.py @@ -11,6 +11,7 @@ InvalidKernelSpecializationError, MissingSpecializationError, ) +from numba_dpex.core.kernel_interface.utils import Range # Similar to Numba, numba-dpex supports eager compilation of functions. The # following examples demonstrate the feature for numba_dpex.kernel and presents @@ -38,7 +39,7 @@ def data_parallel_sum(a, b, c): b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum[1024](a, b, c) +data_parallel_sum[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -65,7 +66,7 @@ def data_parallel_sum2(a, b, c): b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum2[1024](a, b, c) +data_parallel_sum2[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -76,7 +77,7 @@ def data_parallel_sum2(a, b, c): b = dpt.ones(1024, dtype=dpt.float32) c = dpt.zeros(1024, dtype=dpt.float32) -data_parallel_sum2[1024](a, b, c) +data_parallel_sum2[Range(1024)](a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.float32) @@ -94,7 +95,7 @@ def data_parallel_sum2(a, b, c): c = dpt.zeros(1024, dtype=dpt.int32) try: - data_parallel_sum[1024](a, b, c) + data_parallel_sum[Range(1024)](a, b, c) except MissingSpecializationError as mse: print(mse) diff --git a/numba_dpex/examples/kernel/matmul.py b/numba_dpex/examples/kernel/matmul.py index a40ccc207b..5fd8e44832 100644 --- a/numba_dpex/examples/kernel/matmul.py +++ b/numba_dpex/examples/kernel/matmul.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -30,13 +31,13 @@ def gemm(a, b, c): Y = 16 global_size = X, X -griddim = X, X -blockdim = Y, Y +griddim = Range(X, X) +blockdim = Range(Y, Y) def driver(a, b, c): # Invoke the kernel - gemm[griddim, blockdim](a, b, c) + gemm[NdRange(griddim, blockdim)](a, b, c) def main(): diff --git a/numba_dpex/examples/kernel/pairwise_distance.py b/numba_dpex/examples/kernel/pairwise_distance.py index 30d940a871..da4822c64f 100644 --- a/numba_dpex/examples/kernel/pairwise_distance.py +++ b/numba_dpex/examples/kernel/pairwise_distance.py @@ -12,6 +12,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range parser = argparse.ArgumentParser( description="Program to compute pairwise distance" @@ -25,9 +26,9 @@ args = parser.parse_args() # Global work size is equal to the number of points -global_size = (args.n,) +global_size = Range(args.n) # Local Work size is optional -local_size = (args.l,) +local_size = Range(args.l) X = np.random.random((args.n, args.d)).astype(np.single) D = np.empty((args.n, args.n), dtype=np.single) @@ -65,7 +66,7 @@ def driver(): for repeat in range(args.r): start = time() - pairwise_distance[global_size, local_size]( + pairwise_distance[NdRange(global_size, local_size)]( x_ndarray, d_ndarray, X.shape[0], X.shape[1] ) end = time() diff --git a/numba_dpex/examples/kernel/scan.py b/numba_dpex/examples/kernel/scan.py index 6ee4056fbb..13374bbf4b 100644 --- a/numba_dpex/examples/kernel/scan.py +++ b/numba_dpex/examples/kernel/scan.py @@ -7,6 +7,7 @@ import dpnp as np import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # 1D array size N = 64 @@ -56,7 +57,7 @@ def main(): print("Using device ...") print(arr.device) - kernel_hillis_steele_scan[N](arr) + kernel_hillis_steele_scan[Range(N)](arr) # the output should be [0, 1, 3, 6, ...] arr_np = np.asnumpy(arr) diff --git a/numba_dpex/examples/kernel/select_device_for_kernel.py b/numba_dpex/examples/kernel/select_device_for_kernel.py index 7c08d7e9eb..fbe1f27bd1 100644 --- a/numba_dpex/examples/kernel/select_device_for_kernel.py +++ b/numba_dpex/examples/kernel/select_device_for_kernel.py @@ -9,6 +9,7 @@ import numpy as np import numba_dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range """ We support passing arrays of two types to a @numba_dpex.kernel decorated @@ -86,7 +87,7 @@ def select_device_ndarray(N): default_device = dpctl.select_default_device() with numba_dpex.offload_to_sycl_device(default_device.filter_string): - sum_kernel[(N,), (1,)](a, b, got) + sum_kernel[NdRange(Range(N), Range(1))](a, b, got) expected = a + b @@ -110,7 +111,7 @@ def select_device_SUAI(N): # Users don't need to specify where the computation will # take place. It will be inferred from data. - sum_kernel[(N,), (1,)](da, db, dc) + sum_kernel[NdRange(Range(N), Range(1))](da, db, dc) dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1")) diff --git a/numba_dpex/examples/kernel/sum_reduction_ocl.py b/numba_dpex/examples/kernel/sum_reduction_ocl.py index 9ab19bdebd..03ddecdd0f 100644 --- a/numba_dpex/examples/kernel/sum_reduction_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_ocl.py @@ -7,6 +7,7 @@ from numba import int32 import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -49,9 +50,9 @@ def sum_reduce(A): partial_sums = dpt.zeros(nb_work_groups, dtype=A.dtype, device=A.device) - gs = (global_size,) - ls = (work_group_size,) - sum_reduction_kernel[gs, ls](A, partial_sums) + gs = Range(global_size) + ls = Range(work_group_size) + sum_reduction_kernel[NdRange(gs, ls)](A, partial_sums) final_sum = 0 # calculate the final sum in HOST diff --git a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py b/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py index 40183c5931..b90a985df0 100644 --- a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py @@ -13,6 +13,7 @@ from numba import int32 import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import NdRange, Range @dpex.kernel @@ -58,13 +59,15 @@ def sum_recursive_reduction(size, group_size, Dinp, Dpartial_sums): nb_work_groups += 1 passed_size = nb_work_groups * group_size - gr = (passed_size,) - lr = (group_size,) + gr = Range(passed_size) + lr = Range(group_size) - sum_reduction_kernel[gr, lr](Dinp, size, Dpartial_sums) + sum_reduction_kernel[NdRange(gr, lr)](Dinp, size, Dpartial_sums) if nb_work_groups <= group_size: - sum_reduction_kernel[lr, lr](Dpartial_sums, nb_work_groups, Dinp) + sum_reduction_kernel[NdRange(lr, lr)]( + Dpartial_sums, nb_work_groups, Dinp + ) result = int(Dinp[0]) else: result = sum_recursive_reduction( diff --git a/numba_dpex/examples/kernel/vector_sum.py b/numba_dpex/examples/kernel/vector_sum.py index cb1b9fa2bb..40ccc268ba 100644 --- a/numba_dpex/examples/kernel/vector_sum.py +++ b/numba_dpex/examples/kernel/vector_sum.py @@ -6,6 +6,7 @@ import numpy.testing as testing import numba_dpex as ndpx +from numba_dpex.core.kernel_interface.utils import Range # Data parallel kernel implementing vector sum @@ -18,7 +19,7 @@ def kernel_vector_sum(a, b, c): # Utility function for printing and testing def driver(a, b, c, global_size): - kernel_vector_sum[global_size](a, b, c) + kernel_vector_sum[Range(global_size)](a, b, c) a_np = dpnp.asnumpy(a) # Copy dpnp array a to NumPy array a_np b_np = dpnp.asnumpy(b) # Copy dpnp array b to NumPy array b_np diff --git a/numba_dpex/examples/kernel/vector_sum2D.py b/numba_dpex/examples/kernel/vector_sum2D.py index 4e38f0bc4b..5547698df8 100644 --- a/numba_dpex/examples/kernel/vector_sum2D.py +++ b/numba_dpex/examples/kernel/vector_sum2D.py @@ -23,19 +23,14 @@ def data_parallel_sum(a, b, c): def driver(a, b, c, global_size): - data_parallel_sum[global_size, dpex.DEFAULT_LOCAL_SIZE](a, b, c) - - -def driver_with_range(a, b, c, global_size): - ranges = Range(*global_size) - data_parallel_sum[ranges](a, b, c) + data_parallel_sum[global_size](a, b, c) def main(): # Array dimensions X = 8 Y = 8 - global_size = X, Y + global_size = Range(X, Y) a = np.arange(X * Y, dtype=np.float32).reshape(X, Y) b = np.arange(X * Y, dtype=np.float32).reshape(X, Y) @@ -59,11 +54,6 @@ def main(): c_out = dpt.asnumpy(c_dpt) assert np.allclose(c, c_out) - print("Running kernel with the new lanuch parameter syntax ...") - driver_with_range(a_dpt, b_dpt, c_dpt, global_size) - c_out = dpt.asnumpy(c_dpt) - assert np.allclose(c, c_out) - print("Done...") diff --git a/numba_dpex/examples/sum_reduction.py b/numba_dpex/examples/sum_reduction.py index cecafa5603..e7a47c65e7 100644 --- a/numba_dpex/examples/sum_reduction.py +++ b/numba_dpex/examples/sum_reduction.py @@ -8,6 +8,7 @@ import numpy as np import numba_dpex as dpex +from numba_dpex.core.kernel_interface.utils import Range @dpex.kernel @@ -34,7 +35,7 @@ def sum_reduce(A): with dpctl.device_context(device): while total > 1: global_size = total // 2 - sum_reduction_kernel[global_size](A, R, global_size) + sum_reduction_kernel[Range(global_size)](A, R, global_size) total = total // 2 return R[0]