Skip to content

Add intel device info data #1428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dpctl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ foreach(_cy_file ${_cython_sources})
build_dpctl_ext(${_trgt} ${_cy_file} "dpctl")
endforeach()

# _sycl_queue include _host_task_util.hpp
target_include_directories(_sycl_queue PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})

add_subdirectory(program)
Expand Down
27 changes: 27 additions & 0 deletions dpctl/tests/test_tensor_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pytest

import dpctl.tensor as dpt
import dpctl.utils as du
from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported

_all_dtypes = [
Expand Down Expand Up @@ -187,11 +188,28 @@ def test_axis0_bug():
assert dpt.all(s == expected)


def _any_complex(dtypes):
return any(dpt.isdtype(dpt.dtype(dt), "complex floating") for dt in dtypes)


def _skip_on_this_device(sycl_dev):
device_mask = du.intel_device_info(sycl_dev).get("device_id", 0) & 0xFF00
return device_mask in [0x3E00, 0x9B00]


@pytest.mark.parametrize("arg_dtype", _all_dtypes[1:])
def test_prod_arg_dtype_default_output_dtype_matrix(arg_dtype):
q = get_queue_or_skip()
skip_if_dtype_not_supported(arg_dtype, q)

arg_dtype = dpt.dtype(arg_dtype)
if _any_complex((arg_dtype,)):
if _skip_on_this_device(q.sycl_device):
pytest.skip(
"Product reduction for complex output are known "
"to fail for Gen9 with 2024.0 compiler"
)

m = dpt.ones(100, dtype=arg_dtype)
r = dpt.prod(m)

Expand Down Expand Up @@ -242,6 +260,15 @@ def test_prod_arg_out_dtype_matrix(arg_dtype, out_dtype):
skip_if_dtype_not_supported(arg_dtype, q)
skip_if_dtype_not_supported(out_dtype, q)

out_dtype = dpt.dtype(out_dtype)
arg_dtype = dpt.dtype(arg_dtype)
if _any_complex((arg_dtype, out_dtype)):
if _skip_on_this_device(q.sycl_device):
pytest.skip(
"Product reduction for complex output are known "
"to fail for Gen9 with 2024.0 compiler"
)

m = dpt.ones(100, dtype=arg_dtype)
r = dpt.prod(m, dtype=out_dtype)

Expand Down
27 changes: 27 additions & 0 deletions dpctl/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import dpctl
import dpctl.utils
from dpctl.enum_types import backend_type


def test_get_execution_queue_input_validation():
Expand Down Expand Up @@ -122,3 +123,29 @@ def test_onetrace_enabled():
with dpctl.utils.onetrace_enabled():
assert os.getenv(v_name, None) == "1"
assert os.getenv(v_name, None) == v_v


def test_intel_device_info():
try:
d = dpctl.select_default_device()
except dpctl.SyclDeviceCreationError:
pytest.skip("Default device could not be created")
descr = dpctl.utils.intel_device_info(d)
assert isinstance(descr, dict)
assert ("device_id" in descr) or (
not d.has_aspect_cpu and not d.backend == backend_type.level_zero
)
allowed_names = [
"device_id",
"gpu_slices",
"gpu_eu_count",
"gpu_eu_simd_width",
"gpu_hw_threads_per_eu",
"gpu_subslices_per_slice",
"gpu_eu_count_per_subslice",
"max_mem_bandwidth",
]
for descriptor_name in descr.keys():
test = descriptor_name in allowed_names
err_msg = f"Key '{descriptor_name}' is not recognized"
assert test, err_msg
21 changes: 21 additions & 0 deletions dpctl/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,24 @@ foreach(_cy_file ${_cython_sources})
get_filename_component(_trgt ${_cy_file} NAME_WLE)
build_dpctl_ext(${_trgt} ${_cy_file} "dpctl/utils")
endforeach()

add_custom_target(_dpctl4pybind11_header_ready
DEPENDS
_usmarray_copy_capi_include
_memory_copy_capi_include
_sycl_device_copy_capi_include
_sycl_queue_copy_capi_include
_sycl_context_copy_capi_include
_sycl_event_copy_capi_include
)

set(python_module_name _device_queries)
pybind11_add_module(${python_module_name} MODULE
${CMAKE_CURRENT_SOURCE_DIR}/src/device_queries.cpp
)
target_include_directories(${python_module_name}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../include
)
add_dependencies(${python_module_name} _dpctl4pybind11_header_ready)
install(TARGETS ${python_module_name} DESTINATION "dpctl/utils")
67 changes: 67 additions & 0 deletions dpctl/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,85 @@
A collection of utility functions.
"""

from .._sycl_device import SyclDevice
from ._compute_follows_data import (
ExecutionPlacementError,
get_coerced_usm_type,
get_execution_queue,
validate_usm_type,
)
from ._device_queries import (
intel_device_info_device_id,
intel_device_info_gpu_eu_count,
intel_device_info_gpu_eu_count_per_subslice,
intel_device_info_gpu_eu_simd_width,
intel_device_info_gpu_hw_threads_per_eu,
intel_device_info_gpu_slices,
intel_device_info_gpu_subslices_per_slice,
intel_device_info_max_mem_bandwidth,
)
from ._onetrace_context import onetrace_enabled


def intel_device_info(dev):
"""intel_device_info(sycl_device)

For Intel(R) GPU devices returns a dictionary
with device architectural details, and an empty
dictionary otherwise. The dictionary contains
the following keys:

device_id: 32-bits device PCI identifier
gpu_eu_count: Total number of execution units
gpu_hw_threads_per_eu: Number of thread contexts in EU
gpu_eu_simd_width: Physical SIMD width of EU
gpu_slices: Total number of slices
gpu_subslices_per_slice: Number of sub-slices per slice
gpu_eu_count_per_subslice: Number of EUs in subslice
max_mem_bandwidth: Maximum memory bandwidth in bytes/second

Unsupported descriptors are omitted from the dictionary.
Descriptors other than PCI identifier are supported only for
SyclDevices with Leve-Zero backend.
"""
if not isinstance(dev, SyclDevice):
raise TypeError(f"Expected dpctl.SyclDevice, got {type(dev)}")
dev_id = intel_device_info_device_id(dev)
if dev_id:
res = {
"device_id": dev_id,
}
if dev.has_aspect_gpu:
eu_count = intel_device_info_gpu_eu_count(dev)
if eu_count:
res["gpu_eu_count"] = eu_count
hw_threads = intel_device_info_gpu_hw_threads_per_eu(dev)
if hw_threads:
res["gpu_hw_threads_per_eu"] = hw_threads
simd_w = intel_device_info_gpu_eu_simd_width(dev)
if simd_w:
res["gpu_eu_simd_width"] = simd_w
n_slices = intel_device_info_gpu_slices(dev)
if n_slices:
res["gpu_slices"] = n_slices
n_subslices = intel_device_info_gpu_subslices_per_slice(dev)
if n_subslices:
res["gpu_subslices_per_slice"] = n_subslices
n_eu_per_subslice = intel_device_info_gpu_eu_count_per_subslice(dev)
if n_eu_per_subslice:
res["gpu_eu_count_per_subslice"] = n_eu_per_subslice
bw = intel_device_info_max_mem_bandwidth(dev)
if bw:
res["max_mem_bandwidth"] = bw
return res
return dict()


__all__ = [
"get_execution_queue",
"get_coerced_usm_type",
"validate_usm_type",
"onetrace_enabled",
"intel_device_info",
"ExecutionPlacementError",
]
139 changes: 139 additions & 0 deletions dpctl/utils/src/device_queries.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "dpctl4pybind11.hpp"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <sycl/sycl.hpp>

#include <cstddef>
#include <cstdint>

namespace
{

std::uint32_t py_intel_device_id(const sycl::device &d)
{
static constexpr std::uint32_t device_id_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_device_id)) {
return d.get_info<sycl::ext::intel::info::device::device_id>();
}

return device_id_unavailable;
}

std::uint32_t py_intel_gpu_eu_count(const sycl::device &d)
{
static constexpr std::uint32_t eu_count_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_eu_count)) {
return d.get_info<sycl::ext::intel::info::device::gpu_eu_count>();
}

return eu_count_unavailable;
}

std::uint32_t py_intel_gpu_hw_threads_per_eu(const sycl::device &d)
{
static constexpr std::uint32_t thread_count_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
return d
.get_info<sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
}

return thread_count_unavailable;
}

std::uint32_t py_intel_gpu_eu_simd_width(const sycl::device &d)
{
static constexpr std::uint32_t width_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_eu_simd_width)) {
return d.get_info<sycl::ext::intel::info::device::gpu_eu_simd_width>();
}

return width_unavailable;
}

std::uint32_t py_intel_gpu_slices(const sycl::device &d)
{
static constexpr std::uint32_t count_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_slices)) {
return d.get_info<sycl::ext::intel::info::device::gpu_slices>();
}

return count_unavailable;
}

std::uint32_t py_intel_gpu_subslices_per_slice(const sycl::device &d)
{
static constexpr std::uint32_t count_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_subslices_per_slice)) {
return d.get_info<
sycl::ext::intel::info::device::gpu_subslices_per_slice>();
}

return count_unavailable;
}

std::uint32_t py_intel_gpu_eu_count_per_subslice(const sycl::device &d)
{
static constexpr std::uint32_t count_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_gpu_eu_count_per_subslice)) {
return d.get_info<
sycl::ext::intel::info::device::gpu_eu_count_per_subslice>();
}

return count_unavailable;
}

std::uint64_t py_intel_max_mem_bandwidth(const sycl::device &d)
{
static constexpr std::uint64_t bandwidth_unavailable = 0;

if (d.has(sycl::aspect::ext_intel_max_mem_bandwidth)) {
return d.get_info<sycl::ext::intel::info::device::max_mem_bandwidth>();
}

return bandwidth_unavailable;
}

}; // namespace

PYBIND11_MODULE(_device_queries, m)
{
m.def("intel_device_info_device_id", &py_intel_device_id,
"Get ext_intel_device_id for the device, zero if not an intel device",
py::arg("device"));

m.def("intel_device_info_gpu_eu_count", &py_intel_gpu_eu_count,
"Returns the number of execution units (EUs) associated with the "
"Intel GPU.",
py::arg("device"));

m.def("intel_device_info_gpu_hw_threads_per_eu",
&py_intel_gpu_hw_threads_per_eu,
"Returns the number of hardware threads in EU.", py::arg("device"));

m.def("intel_device_info_gpu_eu_simd_width", &py_intel_gpu_eu_simd_width,
"Returns the physical SIMD width of the execution unit (EU).",
py::arg("device"));

m.def("intel_device_info_gpu_slices", &py_intel_gpu_slices,
"Returns the number of slices in the GPU device, or zero.",
py::arg("device"));

m.def("intel_device_info_gpu_subslices_per_slice",
&py_intel_gpu_subslices_per_slice,
"Returns the number of subslices per slice.", py::arg("device"));

m.def("intel_device_info_gpu_eu_count_per_subslice",
&py_intel_gpu_eu_count_per_subslice,
"Returns the number of EUs per subslice of GPU.", py::arg("device"));

m.def("intel_device_info_max_mem_bandwidth", &py_intel_max_mem_bandwidth,
"Returns the maximum memory bandwidth in units of bytes/second.",
py::arg("device"));
}