Skip to content

Commit 2e90f05

Browse files
Merge update_cuda_build into update_amd_build
2 parents 3bfba7e + 155ca52 commit 2e90f05

File tree

4 files changed

+40
-12
lines changed

4 files changed

+40
-12
lines changed

CMakeLists.txt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS
2828
set(DPCTL_TARGET_CUDA
2929
""
3030
CACHE STRING
31-
"Build DPCTL to target CUDA devices. Set to ON to use default architecture (sm_50), or to a specific architecture like sm_80."
31+
"Build DPCTL to target CUDA device. "
32+
"Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), "
33+
"or to a specific architecture like sm_80."
3234
)
3335
set(DPCTL_TARGET_HIP
3436
""
@@ -55,14 +57,14 @@ set(_dpctl_sycl_targets)
5557
set(_dpctl_cuda_arch)
5658
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
5759
if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x")
58-
if (DPCTL_TARGET_CUDA STREQUAL "ON")
59-
set(_dpctl_cuda_arch "sm_50")
60-
elseif(DPCTL_TARGET_CUDA MATCHES "^sm_")
60+
if(DPCTL_TARGET_CUDA MATCHES "^sm_")
6161
set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA})
62+
elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
63+
set(_dpctl_cuda_arch "sm_50")
6264
else()
6365
message(FATAL_ERROR
6466
"Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". "
65-
"Expected 'ON' or an architecture like 'sm_80'."
67+
"Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
6668
)
6769
endif()
6870
set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown")

docs/doc_sources/beginners_guides/installation.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,8 @@ which accepts a specific compute architecture string:
166166
167167
python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80"
168168
169-
To use the default architecture (``sm_50``), use:
169+
To use the default architecture (``sm_50``),
170+
set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, or ``1``:
170171

171172
.. code-block:: bash
172173
@@ -175,10 +176,11 @@ To use the default architecture (``sm_50``), use:
175176
Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
176177
range of architectures, but limiting the usage of more recent CUDA features.
177178

178-
For reference, compute architecture strings like ``sm_80`` are based on
179-
CUDA Compute Capability. A complete mapping between NVIDIA GPU models and their
180-
respective ``sm_XX`` values can be found in the official
181-
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_.
179+
For reference, compute architecture strings like ``sm_80`` correspond to specific
180+
CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
181+
A complete mapping between NVIDIA GPU models and their respective
182+
Compute Capabilities can be found in the official
183+
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
182184

183185
A full list of available SYCL alias targets is available in the
184186
`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.

dpctl/tensor/_ctors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,6 @@ def _asarray_from_usm_ndarray(
111111
raise TypeError(
112112
f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ndary)}"
113113
)
114-
if dtype is None:
115-
dtype = usm_ndary.dtype
116114
if usm_type is None:
117115
usm_type = usm_ndary.usm_type
118116
if sycl_queue is not None:
@@ -122,6 +120,8 @@ def _asarray_from_usm_ndarray(
122120
copy_q = normalize_queue_device(sycl_queue=sycl_queue, device=exec_q)
123121
else:
124122
copy_q = usm_ndary.sycl_queue
123+
if dtype is None:
124+
dtype = _map_to_device_dtype(usm_ndary.dtype, copy_q)
125125
# Conditions for zero copy:
126126
can_zero_copy = copy is not True
127127
# dtype is unchanged

dpctl/tests/test_tensor_asarray.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,3 +623,27 @@ def test_asarray_support_for_usm_ndarray_protocol(usm_type):
623623
assert x.dtype == y3.dtype
624624
assert y3.usm_data.reference_obj is None
625625
assert dpt.all(x[dpt.newaxis, :] == y3)
626+
627+
628+
@pytest.mark.parametrize("dt", [dpt.float16, dpt.float64, dpt.complex128])
629+
def test_asarray_to_device_with_unsupported_dtype(dt):
630+
aspect = "fp16" if dt == dpt.float16 else "fp64"
631+
try:
632+
d0 = dpctl.select_device_with_aspects(aspect)
633+
except dpctl.SyclDeviceCreationError:
634+
pytest.skip("No device with aspect for test")
635+
d1 = None
636+
for d in dpctl.get_devices():
637+
if d.default_selector_score < 0:
638+
pass
639+
try:
640+
d1 = dpctl.select_device_with_aspects(
641+
d.device_type.name, excluded_aspects=[aspect]
642+
)
643+
except dpctl.SyclDeviceCreationError:
644+
pass
645+
if d1 is None:
646+
pytest.skip("No device with missing aspect for test")
647+
x = dpt.ones(10, dtype=dt, device=d0)
648+
y = dpt.asarray(x, device=d1)
649+
assert y.sycl_device == d1

0 commit comments

Comments
 (0)