Skip to content

Commit cf4dc9d

Browse files
authored
Merge branch 'main' into memory-refactor
2 parents 7315e28 + 69aac67 commit cf4dc9d

File tree

7 files changed

+193
-47
lines changed

7 files changed

+193
-47
lines changed

.github/workflows/test-wheel-linux.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ jobs:
108108
uses: ./.github/actions/install_unix_deps
109109
continue-on-error: false
110110
with:
111-
# for artifact fetching
112-
dependencies: "jq wget"
111+
# for artifact fetching, graphics libs
112+
dependencies: "jq wget libgl1 libegl1"
113113
dependent_exes: "jq wget"
114114

115115
- name: Set environment variables

cuda_bindings/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def initialize_options(self):
390390
self.parallel = nthreads
391391

392392
def build_extension(self, ext):
393-
if building_wheel and sys.platform == "linux":
393+
if building_wheel and sys.platform == "linux" and "--debug" not in sys.argv:
394394
# Strip binaries to remove debug symbols
395395
ext.extra_link_args.append("-Wl,--strip-all")
396396
super().build_extension(ext)

cuda_bindings/tests/test_graphics_apis.py

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,104 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

4+
import contextlib
5+
import ctypes
6+
import ctypes.util
7+
import os
8+
import sys
9+
410
import pytest
511
from cuda.bindings import runtime as cudart
612

713

8-
def test_graphics_api_smoketest():
9-
# Due to lazy importing in pyglet, pytest.importorskip doesn't work
14+
@contextlib.contextmanager
15+
def _gl_context():
16+
"""
17+
Yield a (tex_id, tex_target) with a current GL context.
18+
Tries:
19+
1) Windows: hidden WGL window (no EGL)
20+
2) Linux with DISPLAY/wayland: hidden window
21+
3) Linux headless: EGL headless if available
22+
Skips if none work.
23+
"""
24+
pyglet = pytest.importorskip("pyglet")
25+
26+
# Prefer non-headless when a display is available; it's more portable and avoids EGL.
27+
if sys.platform.startswith("linux") and not (os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY")):
28+
if ctypes.util.find_library("EGL") is None:
29+
pytest.skip("No DISPLAY and no EGL runtime available for headless context.")
30+
pyglet.options["headless"] = True
31+
32+
# Create a minimal offscreen/hidden context
33+
win = None
1034
try:
11-
import pyglet
12-
13-
tex = pyglet.image.Texture.create(512, 512)
14-
except (ImportError, AttributeError):
15-
pytest.skip("pyglet not available or could not create GL context")
16-
# return to make linters happy
17-
return
18-
19-
err, gfx_resource = cudart.cudaGraphicsGLRegisterImage(
20-
tex.id, tex.target, cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard
21-
)
22-
error_name = cudart.cudaGetErrorName(err)[1].decode()
23-
if error_name == "cudaSuccess":
24-
assert int(gfx_resource) != 0
25-
else:
26-
assert error_name in ("cudaErrorInvalidValue", "cudaErrorUnknown")
35+
if not pyglet.options.get("headless"):
36+
# Hidden window path (WGL on Windows, GLX/WLS on Linux)
37+
from pyglet import gl
38+
39+
config = gl.Config(double_buffer=False)
40+
win = pyglet.window.Window(visible=False, config=config)
41+
win.switch_to()
42+
else:
43+
# Headless EGL path; pyglet will arrange a pbuffer-like headless context
44+
from pyglet.gl import headless # noqa: F401 (import side-effect creates context)
45+
46+
# Make a tiny texture so we have a real GL object to register
47+
from pyglet.gl import gl as _gl
48+
49+
tex_id = _gl.GLuint(0)
50+
_gl.glGenTextures(1, ctypes.byref(tex_id))
51+
target = _gl.GL_TEXTURE_2D
52+
_gl.glBindTexture(target, tex_id.value)
53+
_gl.glTexParameteri(target, _gl.GL_TEXTURE_MIN_FILTER, _gl.GL_NEAREST)
54+
_gl.glTexParameteri(target, _gl.GL_TEXTURE_MAG_FILTER, _gl.GL_NEAREST)
55+
width, height = 16, 16
56+
_gl.glTexImage2D(target, 0, _gl.GL_RGBA8, width, height, 0, _gl.GL_RGBA, _gl.GL_UNSIGNED_BYTE, None)
57+
58+
yield int(tex_id.value), int(target)
59+
60+
except Exception as e:
61+
# Convert any pyglet/GL creation failure into a clean skip
62+
pytest.skip(f"Could not create GL context/texture: {type(e).__name__}: {e}")
63+
finally:
64+
# Best-effort cleanup
65+
try:
66+
from pyglet.gl import gl as _gl
67+
68+
if tex_id.value:
69+
_gl.glDeleteTextures(1, ctypes.byref(tex_id))
70+
except Exception: # noqa: S110
71+
pass
72+
try:
73+
if win is not None:
74+
win.close()
75+
except Exception: # noqa: S110
76+
pass
77+
78+
79+
@pytest.mark.parametrize(
80+
"flags",
81+
[
82+
cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone,
83+
cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard,
84+
],
85+
)
86+
def test_cuda_gl_register_image_smoketest(flags):
87+
with _gl_context() as (tex_id, tex_target):
88+
# Register
89+
err, resource = cudart.cudaGraphicsGLRegisterImage(tex_id, tex_target, flags)
90+
name = cudart.cudaGetErrorName(err)[1].decode()
91+
92+
# Map error expectations by environment:
93+
# - success: we actually exercised the API
94+
# - operating-system: typical when the driver/runtime refuses interop (e.g., no GPU/driver in CI container)
95+
acceptable = {"cudaSuccess", "cudaErrorOperatingSystem"}
96+
97+
assert name in acceptable, f"cudaGraphicsGLRegisterImage returned {name}"
98+
if name == "cudaSuccess":
99+
assert int(resource) != 0
100+
# Unregister to be tidy
101+
cudart.cudaGraphicsUnregisterResource(resource)
27102

28103

29104
def test_cuda_register_image_invalid():
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
cdef bint _inited
7+
cdef bint _use_ex
8+
9+
cdef void _lazy_init() except *
10+
11+
cdef class LaunchConfig:
12+
"""Customizable launch options."""
13+
cdef public tuple grid
14+
cdef public tuple cluster
15+
cdef public tuple block
16+
cdef public int shmem_size
17+
cdef public bint cooperative_launch
18+
19+
cpdef object _to_native_launch_config(LaunchConfig config)

cuda_core/cuda/core/experimental/_launch_config.py renamed to cuda_core/cuda/core/experimental/_launch_config.pyx

Lines changed: 76 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from dataclasses import dataclass
6-
from typing import Union
7-
85
from cuda.core.experimental._device import Device
96
from cuda.core.experimental._utils.cuda_utils import (
107
CUDAError,
@@ -15,24 +12,27 @@
1512
)
1613

1714
# TODO: revisit this treatment for py313t builds
18-
_inited = False
15+
cdef bint _inited = False
16+
cdef bint _use_ex = False
1917

2018

21-
def _lazy_init():
22-
global _inited
19+
cdef void _lazy_init() except *:
20+
"""Initialize module-level globals for driver version checks."""
21+
global _inited, _use_ex
2322
if _inited:
2423
return
2524

26-
global _use_ex
25+
cdef tuple _py_major_minor
26+
cdef int _driver_ver
27+
2728
# binding availability depends on cuda-python version
2829
_py_major_minor = get_binding_version()
2930
_driver_ver = handle_return(driver.cuDriverGetVersion())
3031
_use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
3132
_inited = True
3233

3334

34-
@dataclass
35-
class LaunchConfig:
35+
cdef class LaunchConfig:
3636
"""Customizable launch options.
3737
3838
Note
@@ -65,21 +65,36 @@ class LaunchConfig:
6565
"""
6666

6767
# TODO: expand LaunchConfig to include other attributes
68-
grid: Union[tuple, int] = None
69-
cluster: Union[tuple, int] = None
70-
block: Union[tuple, int] = None
71-
shmem_size: int | None = None
72-
cooperative_launch: bool | None = False
73-
74-
def __post_init__(self):
68+
# Note: attributes are declared in _launch_config.pxd
69+
70+
def __init__(self, grid=None, cluster=None, block=None,
71+
shmem_size=None, cooperative_launch=False):
72+
"""Initialize LaunchConfig with validation.
73+
74+
Parameters
75+
----------
76+
grid : Union[tuple, int], optional
77+
Grid dimensions (number of blocks or clusters if cluster is specified)
78+
cluster : Union[tuple, int], optional
79+
Cluster dimensions (Thread Block Cluster)
80+
block : Union[tuple, int], optional
81+
Block dimensions (threads per block)
82+
shmem_size : int, optional
83+
Dynamic shared memory size in bytes (default: 0)
84+
cooperative_launch : bool, optional
85+
Whether to launch as cooperative kernel (default: False)
86+
"""
7587
_lazy_init()
76-
self.grid = cast_to_3_tuple("LaunchConfig.grid", self.grid)
77-
self.block = cast_to_3_tuple("LaunchConfig.block", self.block)
88+
89+
# Convert and validate grid and block dimensions
90+
self.grid = cast_to_3_tuple("LaunchConfig.grid", grid)
91+
self.block = cast_to_3_tuple("LaunchConfig.block", block)
92+
7893
# FIXME: Calling Device() strictly speaking is not quite right; we should instead
7994
# look up the device from stream. We probably need to defer the checks related to
8095
# device compute capability or attributes.
8196
# thread block clusters are supported starting H100
82-
if self.cluster is not None:
97+
if cluster is not None:
8398
if not _use_ex:
8499
err, drvers = driver.cuDriverGetVersion()
85100
drvers_fmt = f" (got driver version {drvers})" if err == driver.CUresult.CUDA_SUCCESS else ""
@@ -89,19 +104,53 @@ def __post_init__(self):
89104
raise CUDAError(
90105
f"thread block clusters are not supported on devices with compute capability < 9.0 (got {cc})"
91106
)
92-
self.cluster = cast_to_3_tuple("LaunchConfig.cluster", self.cluster)
93-
if self.shmem_size is None:
107+
self.cluster = cast_to_3_tuple("LaunchConfig.cluster", cluster)
108+
else:
109+
self.cluster = None
110+
111+
# Handle shmem_size default
112+
if shmem_size is None:
94113
self.shmem_size = 0
114+
else:
115+
self.shmem_size = shmem_size
116+
117+
# Handle cooperative_launch
118+
self.cooperative_launch = cooperative_launch
119+
120+
# Validate cooperative launch support
95121
if self.cooperative_launch and not Device().properties.cooperative_launch:
96122
raise CUDAError("cooperative kernels are not supported on this device")
97123

124+
def __repr__(self):
125+
"""Return string representation of LaunchConfig."""
126+
return (f"LaunchConfig(grid={self.grid}, cluster={self.cluster}, "
127+
f"block={self.block}, shmem_size={self.shmem_size}, "
128+
f"cooperative_launch={self.cooperative_launch})")
129+
98130

99-
def _to_native_launch_config(config: LaunchConfig) -> driver.CUlaunchConfig:
131+
cpdef object _to_native_launch_config(LaunchConfig config):
132+
"""Convert LaunchConfig to native driver CUlaunchConfig.
133+
134+
Parameters
135+
----------
136+
config : LaunchConfig
137+
High-level launch configuration
138+
139+
Returns
140+
-------
141+
driver.CUlaunchConfig
142+
Native CUDA driver launch configuration
143+
"""
100144
_lazy_init()
101-
drv_cfg = driver.CUlaunchConfig()
145+
146+
cdef object drv_cfg = driver.CUlaunchConfig()
147+
cdef list attrs
148+
cdef object attr
149+
cdef object dim
150+
cdef tuple grid_blocks
102151

103152
# Handle grid dimensions and cluster configuration
104-
if config.cluster:
153+
if config.cluster is not None:
105154
# Convert grid from cluster units to block units
106155
grid_blocks = (
107156
config.grid[0] * config.cluster[0],
@@ -122,11 +171,14 @@ def _to_native_launch_config(config: LaunchConfig) -> driver.CUlaunchConfig:
122171

123172
drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
124173
drv_cfg.sharedMemBytes = config.shmem_size
174+
125175
if config.cooperative_launch:
126176
attr = driver.CUlaunchAttribute()
127177
attr.id = driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE
128178
attr.value.cooperative = 1
129179
attrs.append(attr)
180+
130181
drv_cfg.numAttrs = len(attrs)
131182
drv_cfg.attrs = attrs
183+
132184
return drv_cfg

cuda_core/cuda/core/experimental/_launcher.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ from cuda.core.experimental._stream cimport _try_to_get_stream_ptr
99
from typing import Union
1010

1111
from cuda.core.experimental._kernel_arg_handler import ParamHolder
12-
from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
12+
from cuda.core.experimental._launch_config cimport LaunchConfig, _to_native_launch_config
1313
from cuda.core.experimental._module import Kernel
1414
from cuda.core.experimental._stream import IsStreamT, Stream
1515
from cuda.core.experimental._utils.clear_error_support import assert_type

cuda_core/tests/test_launcher.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def test_launch_invalid_values(init_cuda):
131131
ker = mod.get_kernel("my_kernel")
132132
config = LaunchConfig(grid=(1, 1, 1), block=(1, 1, 1), shmem_size=0)
133133

134-
with pytest.raises(ValueError):
134+
with pytest.raises(TypeError):
135135
launch(None, ker, config)
136136

137137
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)