From 24803891a3f4d18ff74a1896ae307d45562d5d45 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 12 Feb 2025 12:27:46 -0800 Subject: [PATCH 01/69] Unmodified copies of: * https://github.com/NVIDIA/numba-cuda/blob/bf487d78a40eea87f009d636882a5000a7524c95/numba_cuda/numba/cuda/cuda_paths.py * https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py --- .../cuda/bindings/ecosystem/cuda_paths.py | 328 ++++++++++++++++++ .../cuda/bindings/ecosystem/findlib.py | 63 ++++ 2 files changed, 391 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py create mode 100644 cuda_bindings/cuda/bindings/ecosystem/findlib.py diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py new file mode 100644 index 000000000..4290a0a95 --- /dev/null +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -0,0 +1,328 @@ +import sys +import re +import os +from collections import namedtuple +import platform + +from numba.core.config import IS_WIN32 +from numba.misc.findlib import find_lib, find_file +from numba import config + + +_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info']) + + +def _find_valid_path(options): + """Find valid path from *options*, which is a list of 2-tuple of + (name, path). Return first pair where *path* is not None. + If no valid path is found, return ('', None) + """ + for by, data in options: + if data is not None: + return by, data + else: + return '', None + + +def _get_libdevice_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_libdevice_ctk()), + ('CUDA_HOME', get_cuda_home('nvvm', 'libdevice')), + ('System', get_system_ctk('nvvm', 'libdevice')), + ('Debian package', get_debian_pkg_libdevice()), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _nvvm_lib_dir(): + if IS_WIN32: + return 'nvvm', 'bin' + else: + return 'nvvm', 'lib64' + + +def _get_nvvm_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()), + ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())), + ('System', get_system_ctk(*_nvvm_lib_dir())), + ] + by, path = _find_valid_path(options) + return by, path + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + # Search for pattern + pat = r'libdevice(\.\d+)*\.bc$' + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + +def _cudalib_path(): + if IS_WIN32: + return 'bin' + else: + return 'lib64' + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ('lib', 'x64') + else: + return ('lib64',) + + +def _get_cudalib_dir_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()), + ('CUDA_HOME', get_cuda_home(_cudalib_path())), + ('System', get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()), + ('CUDA_HOME', get_cuda_home(*_cuda_home_static_cudalib_path())), + ('System', get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def get_system_ctk(*subdirs): + """Return path to system-wide cudatoolkit; or, None if it doesn't exist. + """ + # Linux? + if sys.platform.startswith('linux'): + # Is cuda alias to /usr/local/cuda? + # We are intentionally not getting versioned cuda installation. + base = '/usr/local/cuda' + if os.path.exists(base): + return os.path.join(base, *subdirs) + + +def get_conda_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + if not is_conda_env: + return + # Assume the existence of NVVM to imply cudatoolkit installed + paths = find_lib('nvvm') + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_nvvm_ctk(): + """Return path to directory containing the NVVM shared library. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + if not is_conda_env: + return + + # Assume the existence of NVVM in the conda env implies that a CUDA toolkit + # conda package is installed. + + # First, try the location used on Linux and the Windows 11.x packages + libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that fails, try the location used for Windows 12.x packages + libdir = os.path.join(sys.prefix, 'Library', 'nvvm', _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that doesn't exist either, assume we don't have the NVIDIA + # conda package + return + + paths = find_lib('nvvm', libdir=libdir) + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, 'libdevice') + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = 'bin' if IS_WIN32 else 'lib' + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): + # Location specific to CUDA 11.x packages on Windows + dirs = ('Lib', 'x64') + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ('lib',) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + +def get_cuda_home(*subdirs): + """Get paths of CUDA_HOME. + If *subdirs* are the subdirectory name to be appended in the resulting + path. + """ + cuda_home = os.environ.get('CUDA_HOME') + if cuda_home is None: + # Try Windows CUDA installation without Anaconda + cuda_home = os.environ.get('CUDA_PATH') + if cuda_home is not None: + return os.path.join(cuda_home, *subdirs) + + +def _get_nvvm_path(): + by, path = _get_nvvm_path_decision() + candidates = find_lib('nvvm', path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, '_cached_result'): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + 'nvvm': _get_nvvm_path(), + 'libdevice': _get_libdevice_paths(), + 'cudalib_dir': _get_cudalib_dir(), + 'static_cudalib_dir': _get_static_cudalib_dir(), + 'include_dir': _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = '/usr/lib/nvidia-cuda-toolkit/libdevice' + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = { + 'x86_64': 'x86_64-linux', + 'aarch64': 'sbsa-linux' + } + elif system == "Windows": + arch_to_targets = { + 'AMD64': 'x64', + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join( + sys.prefix, 'Library', 'include' + ) + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join( + sys.prefix, 'targets', target_name, 'include' + ) + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, 'include') + + if (os.path.exists(include_dir) and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, + 'cuda_device_runtime_api.h'))): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ('Conda environment (NVIDIA package)', get_conda_include_dir()), + ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/ecosystem/findlib.py b/cuda_bindings/cuda/bindings/ecosystem/findlib.py new file mode 100644 index 000000000..3b48ab570 --- /dev/null +++ b/cuda_bindings/cuda/bindings/ecosystem/findlib.py @@ -0,0 +1,63 @@ +import sys +import os +import re + + +def get_lib_dirs(): + """ + Anaconda specific + """ + if sys.platform == 'win32': + # on windows, historically `DLLs` has been used for CUDA libraries, + # since approximately CUDA 9.2, `Library\bin` has been used. + dirnames = ['DLLs', os.path.join('Library', 'bin')] + else: + dirnames = ['lib', ] + libdirs = [os.path.join(sys.prefix, x) for x in dirnames] + return libdirs + + +DLLNAMEMAP = { + 'linux': r'lib%(name)s\.so\.%(ver)s$', + 'linux2': r'lib%(name)s\.so\.%(ver)s$', + 'linux-static': r'lib%(name)s\.a$', + 'darwin': r'lib%(name)s\.%(ver)s\.dylib$', + 'win32': r'%(name)s%(ver)s\.dll$', + 'win32-static': r'%(name)s\.lib$', + 'bsd': r'lib%(name)s\.so\.%(ver)s$', +} + +RE_VER = r'[0-9]*([_\.][0-9]+)*' + + +def find_lib(libname, libdir=None, platform=None, static=False): + platform = platform or sys.platform + platform = 'bsd' if 'bsd' in platform else platform + if static: + platform = f"{platform}-static" + if platform not in DLLNAMEMAP: + # Return empty list if platform name is undefined. + # Not all platforms define their static library paths. + return [] + pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} + regex = re.compile(pat) + return find_file(regex, libdir) + + +def find_file(pat, libdir=None): + if libdir is None: + libdirs = get_lib_dirs() + elif isinstance(libdir, str): + libdirs = [libdir,] + else: + libdirs = list(libdir) + files = [] + for ldir in libdirs: + try: + entries = os.listdir(ldir) + except FileNotFoundError: + continue + candidates = [os.path.join(ldir, ent) + for ent in entries if pat.match(ent)] + files.extend([c for c in candidates if os.path.isfile(c)]) + return files From a0b8d3c4217853dbfc1e25ee8d8aff9c105c1d4d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 12 Feb 2025 12:33:42 -0800 Subject: [PATCH 02/69] Add Forked from URLs. --- cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py | 3 +++ cuda_bindings/cuda/bindings/ecosystem/findlib.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index 4290a0a95..312af892c 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -1,3 +1,6 @@ +# Forked from: +# https://github.com/NVIDIA/numba-cuda/blob/bf487d78a40eea87f009d636882a5000a7524c95/numba_cuda/numba/cuda/cuda_paths.py + import sys import re import os diff --git a/cuda_bindings/cuda/bindings/ecosystem/findlib.py b/cuda_bindings/cuda/bindings/ecosystem/findlib.py index 3b48ab570..e11d2b573 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/findlib.py +++ b/cuda_bindings/cuda/bindings/ecosystem/findlib.py @@ -1,3 +1,6 @@ +# Forked from: +# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py + import sys import os import re From 8439abc5efe5e3d71e3545338e13630f3a3ed4de Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 12 Feb 2025 13:59:14 -0800 Subject: [PATCH 03/69] Strip down cuda_paths.py to minimum required for `_get_nvvm_path()` Tested interactively with: ``` import cuda_paths nvvm_path = cuda_paths._get_nvvm_path() print(f"{nvvm_path=}") ``` --- .../cuda/bindings/ecosystem/cuda_paths.py | 211 +----------------- 1 file changed, 2 insertions(+), 209 deletions(-) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index 312af892c..3a5fabb59 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -7,10 +7,9 @@ from collections import namedtuple import platform -from numba.core.config import IS_WIN32 -from numba.misc.findlib import find_lib, find_file -from numba import config +from findlib import find_file, find_lib +IS_WIN32 = sys.platform.startswith('win32') _env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info']) @@ -27,18 +26,6 @@ def _find_valid_path(options): return '', None -def _get_libdevice_path_decision(): - options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_libdevice_ctk()), - ('CUDA_HOME', get_cuda_home('nvvm', 'libdevice')), - ('System', get_system_ctk('nvvm', 'libdevice')), - ('Debian package', get_debian_pkg_libdevice()), - ] - by, libdir = _find_valid_path(options) - return by, libdir - - def _nvvm_lib_dir(): if IS_WIN32: return 'nvvm', 'bin' @@ -57,16 +44,6 @@ def _get_nvvm_path_decision(): return by, path -def _get_libdevice_paths(): - by, libdir = _get_libdevice_path_decision() - # Search for pattern - pat = r'libdevice(\.\d+)*\.bc$' - candidates = find_file(re.compile(pat), libdir) - # Keep only the max (most recent version) of the bitcode files. - out = max(candidates, default=None) - return _env_path_tuple(by, out) - - def _cudalib_path(): if IS_WIN32: return 'bin' @@ -74,45 +51,6 @@ def _cudalib_path(): return 'lib64' -def _cuda_home_static_cudalib_path(): - if IS_WIN32: - return ('lib', 'x64') - else: - return ('lib64',) - - -def _get_cudalib_dir_path_decision(): - options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()), - ('CUDA_HOME', get_cuda_home(_cudalib_path())), - ('System', get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir - - -def _get_static_cudalib_dir_path_decision(): - options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()), - ('CUDA_HOME', get_cuda_home(*_cuda_home_static_cudalib_path())), - ('System', get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir - - -def _get_cudalib_dir(): - by, libdir = _get_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def _get_static_cudalib_dir(): - by, libdir = _get_static_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - def get_system_ctk(*subdirs): """Return path to system-wide cudatoolkit; or, None if it doesn't exist. """ @@ -166,45 +104,6 @@ def get_nvidia_nvvm_ctk(): return os.path.dirname(max(paths)) -def get_nvidia_libdevice_ctk(): - """Return path to directory containing the libdevice library. - """ - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - nvvm_dir = os.path.dirname(nvvm_ctk) - return os.path.join(nvvm_dir, 'libdevice') - - -def get_nvidia_cudalib_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit. - """ - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - subdir = 'bin' if IS_WIN32 else 'lib' - return os.path.join(env_dir, subdir) - - -def get_nvidia_static_cudalib_ctk(): - """Return path to directory containing the static libraries of cudatoolkit. - """ - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - - if IS_WIN32 and ("Library" not in nvvm_ctk): - # Location specific to CUDA 11.x packages on Windows - dirs = ('Lib', 'x64') - else: - # Linux, or Windows with CUDA 12.x packages - dirs = ('lib',) - - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - return os.path.join(env_dir, *dirs) - - def get_cuda_home(*subdirs): """Get paths of CUDA_HOME. If *subdirs* are the subdirectory name to be appended in the resulting @@ -223,109 +122,3 @@ def _get_nvvm_path(): candidates = find_lib('nvvm', path) path = max(candidates) if candidates else None return _env_path_tuple(by, path) - - -def get_cuda_paths(): - """Returns a dictionary mapping component names to a 2-tuple - of (source_variable, info). - - The returned dictionary will have the following keys and infos: - - "nvvm": file_path - - "libdevice": List[Tuple[arch, file_path]] - - "cudalib_dir": directory_path - - Note: The result of the function is cached. - """ - # Check cache - if hasattr(get_cuda_paths, '_cached_result'): - return get_cuda_paths._cached_result - else: - # Not in cache - d = { - 'nvvm': _get_nvvm_path(), - 'libdevice': _get_libdevice_paths(), - 'cudalib_dir': _get_cudalib_dir(), - 'static_cudalib_dir': _get_static_cudalib_dir(), - 'include_dir': _get_include_dir(), - } - # Cache result - get_cuda_paths._cached_result = d - return d - - -def get_debian_pkg_libdevice(): - """ - Return the Debian NVIDIA Maintainers-packaged libdevice location, if it - exists. - """ - pkg_libdevice_location = '/usr/lib/nvidia-cuda-toolkit/libdevice' - if not os.path.exists(pkg_libdevice_location): - return None - return pkg_libdevice_location - - -def get_current_cuda_target_name(): - """Determine conda's CTK target folder based on system and machine arch. - - CTK's conda package delivers headers based on its architecture type. For example, - `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and - `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the - nuances at cudart's conda feedstock: - https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 - """ - system = platform.system() - machine = platform.machine() - - if system == "Linux": - arch_to_targets = { - 'x86_64': 'x86_64-linux', - 'aarch64': 'sbsa-linux' - } - elif system == "Windows": - arch_to_targets = { - 'AMD64': 'x64', - } - else: - arch_to_targets = {} - - return arch_to_targets.get(machine, None) - - -def get_conda_include_dir(): - """ - Return the include directory in the current conda environment, if one - is active and it exists. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) - if not is_conda_env: - return - - if platform.system() == "Windows": - include_dir = os.path.join( - sys.prefix, 'Library', 'include' - ) - elif target_name := get_current_cuda_target_name(): - include_dir = os.path.join( - sys.prefix, 'targets', target_name, 'include' - ) - else: - # A fallback when target cannot determined - # though usually it shouldn't. - include_dir = os.path.join(sys.prefix, 'include') - - if (os.path.exists(include_dir) and os.path.isdir(include_dir) - and os.path.exists(os.path.join(include_dir, - 'cuda_device_runtime_api.h'))): - return include_dir - return - - -def _get_include_dir(): - """Find the root include directory.""" - options = [ - ('Conda environment (NVIDIA package)', get_conda_include_dir()), - ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH), - # TODO: add others - ] - by, include_dir = _find_valid_path(options) - return _env_path_tuple(by, include_dir) From 586d2092f35708a16ad5801e57cf0f5b0050730e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 12 Feb 2025 14:02:06 -0800 Subject: [PATCH 04/69] ruff auto-fixes (NO manual changes) --- .../cuda/bindings/ecosystem/cuda_paths.py | 59 +++++++++---------- .../cuda/bindings/ecosystem/findlib.py | 35 ++++++----- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index 3a5fabb59..4b01b9f78 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -1,17 +1,15 @@ # Forked from: # https://github.com/NVIDIA/numba-cuda/blob/bf487d78a40eea87f009d636882a5000a7524c95/numba_cuda/numba/cuda/cuda_paths.py -import sys -import re import os +import sys from collections import namedtuple -import platform -from findlib import find_file, find_lib +from findlib import find_lib -IS_WIN32 = sys.platform.startswith('win32') +IS_WIN32 = sys.platform.startswith("win32") -_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info']) +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) def _find_valid_path(options): @@ -23,22 +21,22 @@ def _find_valid_path(options): if data is not None: return by, data else: - return '', None + return "", None def _nvvm_lib_dir(): if IS_WIN32: - return 'nvvm', 'bin' + return "nvvm", "bin" else: - return 'nvvm', 'lib64' + return "nvvm", "lib64" def _get_nvvm_path_decision(): options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()), - ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())), - ('System', get_system_ctk(*_nvvm_lib_dir())), + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("System", get_system_ctk(*_nvvm_lib_dir())), ] by, path = _find_valid_path(options) return by, path @@ -46,31 +44,29 @@ def _get_nvvm_path_decision(): def _cudalib_path(): if IS_WIN32: - return 'bin' + return "bin" else: - return 'lib64' + return "lib64" def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist. - """ + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" # Linux? - if sys.platform.startswith('linux'): + if sys.platform.startswith("linux"): # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = '/usr/local/cuda' + base = "/usr/local/cuda" if os.path.exists(base): return os.path.join(base, *subdirs) def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) if not is_conda_env: return # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib('nvvm') + paths = find_lib("nvvm") if not paths: return # Use the directory name of the max path @@ -78,9 +74,8 @@ def get_conda_ctk(): def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) if not is_conda_env: return @@ -88,16 +83,16 @@ def get_nvidia_nvvm_ctk(): # conda package is installed. # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path()) + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, 'Library', 'nvvm', _cudalib_path()) + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that doesn't exist either, assume we don't have the NVIDIA # conda package return - paths = find_lib('nvvm', libdir=libdir) + paths = find_lib("nvvm", libdir=libdir) if not paths: return # Use the directory name of the max path @@ -109,16 +104,16 @@ def get_cuda_home(*subdirs): If *subdirs* are the subdirectory name to be appended in the resulting path. """ - cuda_home = os.environ.get('CUDA_HOME') + cuda_home = os.environ.get("CUDA_HOME") if cuda_home is None: # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get('CUDA_PATH') + cuda_home = os.environ.get("CUDA_PATH") if cuda_home is not None: return os.path.join(cuda_home, *subdirs) def _get_nvvm_path(): by, path = _get_nvvm_path_decision() - candidates = find_lib('nvvm', path) + candidates = find_lib("nvvm", path) path = max(candidates) if candidates else None return _env_path_tuple(by, path) diff --git a/cuda_bindings/cuda/bindings/ecosystem/findlib.py b/cuda_bindings/cuda/bindings/ecosystem/findlib.py index e11d2b573..4de57c905 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/findlib.py +++ b/cuda_bindings/cuda/bindings/ecosystem/findlib.py @@ -1,41 +1,43 @@ # Forked from: # https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py -import sys import os import re +import sys def get_lib_dirs(): """ Anaconda specific """ - if sys.platform == 'win32': + if sys.platform == "win32": # on windows, historically `DLLs` has been used for CUDA libraries, # since approximately CUDA 9.2, `Library\bin` has been used. - dirnames = ['DLLs', os.path.join('Library', 'bin')] + dirnames = ["DLLs", os.path.join("Library", "bin")] else: - dirnames = ['lib', ] + dirnames = [ + "lib", + ] libdirs = [os.path.join(sys.prefix, x) for x in dirnames] return libdirs DLLNAMEMAP = { - 'linux': r'lib%(name)s\.so\.%(ver)s$', - 'linux2': r'lib%(name)s\.so\.%(ver)s$', - 'linux-static': r'lib%(name)s\.a$', - 'darwin': r'lib%(name)s\.%(ver)s\.dylib$', - 'win32': r'%(name)s%(ver)s\.dll$', - 'win32-static': r'%(name)s\.lib$', - 'bsd': r'lib%(name)s\.so\.%(ver)s$', + "linux": r"lib%(name)s\.so\.%(ver)s$", + "linux2": r"lib%(name)s\.so\.%(ver)s$", + "linux-static": r"lib%(name)s\.a$", + "darwin": r"lib%(name)s\.%(ver)s\.dylib$", + "win32": r"%(name)s%(ver)s\.dll$", + "win32-static": r"%(name)s\.lib$", + "bsd": r"lib%(name)s\.so\.%(ver)s$", } -RE_VER = r'[0-9]*([_\.][0-9]+)*' +RE_VER = r"[0-9]*([_\.][0-9]+)*" def find_lib(libname, libdir=None, platform=None, static=False): platform = platform or sys.platform - platform = 'bsd' if 'bsd' in platform else platform + platform = "bsd" if "bsd" in platform else platform if static: platform = f"{platform}-static" if platform not in DLLNAMEMAP: @@ -51,7 +53,9 @@ def find_file(pat, libdir=None): if libdir is None: libdirs = get_lib_dirs() elif isinstance(libdir, str): - libdirs = [libdir,] + libdirs = [ + libdir, + ] else: libdirs = list(libdir) files = [] @@ -60,7 +64,6 @@ def find_file(pat, libdir=None): entries = os.listdir(ldir) except FileNotFoundError: continue - candidates = [os.path.join(ldir, ent) - for ent in entries if pat.match(ent)] + candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] files.extend([c for c in candidates if os.path.isfile(c)]) return files From 91b6b99ea6db00914afff18d026dcd3ba4f2e847 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 12 Feb 2025 14:04:47 -0800 Subject: [PATCH 05/69] Make `get_nvvm_path()` a pubic API (i.e. remove leading underscore). --- cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index 4b01b9f78..291adf4fb 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -112,7 +112,7 @@ def get_cuda_home(*subdirs): return os.path.join(cuda_home, *subdirs) -def _get_nvvm_path(): +def get_nvvm_path(): by, path = _get_nvvm_path_decision() candidates = find_lib("nvvm", path) path = max(candidates) if candidates else None From d31920ca07db52b2bd63810a017b19bf683a8ce1 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 19 Mar 2025 11:53:42 -0700 Subject: [PATCH 06/69] Fetch numba-cuda/numba_cuda/numba/cuda/cuda_paths.py from https://github.com/NVIDIA/numba-cuda/pull/155 AS-IS --- .../cuda/bindings/ecosystem/cuda_paths.py | 333 ++++++++++++++++-- 1 file changed, 300 insertions(+), 33 deletions(-) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index 291adf4fb..a5d37dfbd 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -1,15 +1,16 @@ -# Forked from: -# https://github.com/NVIDIA/numba-cuda/blob/bf487d78a40eea87f009d636882a5000a7524c95/numba_cuda/numba/cuda/cuda_paths.py - -import os import sys +import re +import os from collections import namedtuple +import platform +import site +from pathlib import Path +from numba.core.config import IS_WIN32 +from numba.misc.findlib import find_lib, find_file +from numba import config -from findlib import find_lib -IS_WIN32 = sys.platform.startswith("win32") - -_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) +_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info']) def _find_valid_path(options): @@ -21,52 +22,158 @@ def _find_valid_path(options): if data is not None: return by, data else: - return "", None + return '', None + + +def _get_libdevice_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_libdevice_ctk()), + ('CUDA_HOME', get_cuda_home('nvvm', 'libdevice')), + ('Debian package', get_debian_pkg_libdevice()), + ('NVIDIA NVCC Wheel', get_libdevice_wheel()), + ] + libdevice_ctk_dir = get_system_ctk('nvvm', 'libdevice') + if os.path.exists(libdevice_ctk_dir): + options.append(('System', libdevice_ctk_dir)) + + by, libdir = _find_valid_path(options) + return by, libdir def _nvvm_lib_dir(): if IS_WIN32: - return "nvvm", "bin" + return 'nvvm', 'bin' else: - return "nvvm", "lib64" + return 'nvvm', 'lib64' def _get_nvvm_path_decision(): options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), - ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), - ("System", get_system_ctk(*_nvvm_lib_dir())), + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()), + ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())), + ('NVIDIA NVCC Wheel', _get_nvvm_wheel()), ] + # need to ensure nvvm dir actually exists + nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) + if os.path.exists(nvvm_ctk_dir): + options.append(('System', nvvm_ctk_dir)) + by, path = _find_valid_path(options) return by, path +def _get_nvvm_wheel(): + site_paths = [ + site.getusersitepackages() + ] + site.getsitepackages() + ["conda", None] + for sp in site_paths: + # The SONAME is taken based on public CTK 12.x releases + if sys.platform.startswith("linux"): + dso_dir = "lib64" + # Hack: libnvvm from Linux wheel + # does not have any soname (CUDAINST-3183) + dso_path = "libnvvm.so" + elif sys.platform.startswith("win32"): + dso_dir = "bin" + dso_path = "nvvm64_40_0.dll" + else: + raise AssertionError() + + if sp is not None: + dso_dir = os.path.join( + sp, + "nvidia", + "cuda_nvcc", + "nvvm", + dso_dir + ) + dso_path = os.path.join(dso_dir, dso_path) + if os.path.exists(dso_path): + return str(Path(dso_path).parent) + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + out = os.path.join(libdir, "libdevice.10.bc") + else: + # Search for pattern + pat = r'libdevice(\.\d+)*\.bc$' + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + def _cudalib_path(): if IS_WIN32: - return "bin" + return 'bin' else: - return "lib64" + return 'lib64' + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ('lib', 'x64') + else: + return ('lib64',) + + +def _get_cudalib_dir_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()), + ('CUDA_HOME', get_cuda_home(_cudalib_path())), + ('System', get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ('Conda environment', get_conda_ctk()), + ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()), + ('CUDA_HOME', get_cuda_home(*_cuda_home_static_cudalib_path())), + ('System', get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" + """Return path to system-wide cudatoolkit; or, None if it doesn't exist. + """ # Linux? - if sys.platform.startswith("linux"): + if sys.platform.startswith('linux'): # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = "/usr/local/cuda" + base = '/usr/local/cuda' if os.path.exists(base): return os.path.join(base, *subdirs) def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + """Return path to directory containing the shared libraries of cudatoolkit. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) if not is_conda_env: return # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib("nvvm") + paths = find_lib('nvvm') if not paths: return # Use the directory name of the max path @@ -74,8 +181,9 @@ def get_conda_ctk(): def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + """Return path to directory containing the NVVM shared library. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) if not is_conda_env: return @@ -83,37 +191,196 @@ def get_nvidia_nvvm_ctk(): # conda package is installed. # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) + libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) + libdir = os.path.join(sys.prefix, 'Library', 'nvvm', _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that doesn't exist either, assume we don't have the NVIDIA # conda package return - paths = find_lib("nvvm", libdir=libdir) + paths = find_lib('nvvm', libdir=libdir) if not paths: return # Use the directory name of the max path return os.path.dirname(max(paths)) +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, 'libdevice') + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = 'bin' if IS_WIN32 else 'lib' + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit. + """ + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): + # Location specific to CUDA 11.x packages on Windows + dirs = ('Lib', 'x64') + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ('lib',) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + def get_cuda_home(*subdirs): """Get paths of CUDA_HOME. If *subdirs* are the subdirectory name to be appended in the resulting path. """ - cuda_home = os.environ.get("CUDA_HOME") + cuda_home = os.environ.get('CUDA_HOME') if cuda_home is None: # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get("CUDA_PATH") + cuda_home = os.environ.get('CUDA_PATH') if cuda_home is not None: return os.path.join(cuda_home, *subdirs) -def get_nvvm_path(): +def _get_nvvm_path(): by, path = _get_nvvm_path_decision() - candidates = find_lib("nvvm", path) - path = max(candidates) if candidates else None + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + path = os.path.join(path, "libnvvm.so") + else: + candidates = find_lib('nvvm', path) + path = max(candidates) if candidates else None return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, '_cached_result'): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + 'nvvm': _get_nvvm_path(), + 'libdevice': _get_libdevice_paths(), + 'cudalib_dir': _get_cudalib_dir(), + 'static_cudalib_dir': _get_static_cudalib_dir(), + 'include_dir': _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = '/usr/lib/nvidia-cuda-toolkit/libdevice' + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = { + 'x86_64': 'x86_64-linux', + 'aarch64': 'sbsa-linux' + } + elif system == "Windows": + arch_to_targets = { + 'AMD64': 'x64', + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join( + sys.prefix, 'Library', 'include' + ) + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join( + sys.prefix, 'targets', target_name, 'include' + ) + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, 'include') + + if (os.path.exists(include_dir) and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, + 'cuda_device_runtime_api.h'))): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ('Conda environment (NVIDIA package)', get_conda_include_dir()), + ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) From ed0ebb3117f4b3622b3b2d1ae7c80e90e3592800 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 19 Mar 2025 11:56:17 -0700 Subject: [PATCH 07/69] ruff format NO MANUAL CHANGES --- .../cuda/bindings/ecosystem/cuda_paths.py | 175 ++++++++---------- 1 file changed, 78 insertions(+), 97 deletions(-) diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index a5d37dfbd..d1f4a85f0 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -1,16 +1,16 @@ -import sys -import re import os -from collections import namedtuple import platform +import re import site +import sys +from collections import namedtuple from pathlib import Path -from numba.core.config import IS_WIN32 -from numba.misc.findlib import find_lib, find_file -from numba import config +from numba import config +from numba.core.config import IS_WIN32 +from numba.misc.findlib import find_file, find_lib -_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info']) +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) def _find_valid_path(options): @@ -22,20 +22,20 @@ def _find_valid_path(options): if data is not None: return by, data else: - return '', None + return "", None def _get_libdevice_path_decision(): options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_libdevice_ctk()), - ('CUDA_HOME', get_cuda_home('nvvm', 'libdevice')), - ('Debian package', get_debian_pkg_libdevice()), - ('NVIDIA NVCC Wheel', get_libdevice_wheel()), + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), + ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice()), + ("NVIDIA NVCC Wheel", get_libdevice_wheel()), ] - libdevice_ctk_dir = get_system_ctk('nvvm', 'libdevice') + libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") if os.path.exists(libdevice_ctk_dir): - options.append(('System', libdevice_ctk_dir)) + options.append(("System", libdevice_ctk_dir)) by, libdir = _find_valid_path(options) return by, libdir @@ -43,31 +43,29 @@ def _get_libdevice_path_decision(): def _nvvm_lib_dir(): if IS_WIN32: - return 'nvvm', 'bin' + return "nvvm", "bin" else: - return 'nvvm', 'lib64' + return "nvvm", "lib64" def _get_nvvm_path_decision(): options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_nvvm_ctk()), - ('CUDA_HOME', get_cuda_home(*_nvvm_lib_dir())), - ('NVIDIA NVCC Wheel', _get_nvvm_wheel()), + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), ] # need to ensure nvvm dir actually exists nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) if os.path.exists(nvvm_ctk_dir): - options.append(('System', nvvm_ctk_dir)) + options.append(("System", nvvm_ctk_dir)) by, path = _find_valid_path(options) return by, path def _get_nvvm_wheel(): - site_paths = [ - site.getusersitepackages() - ] + site.getsitepackages() + ["conda", None] + site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] for sp in site_paths: # The SONAME is taken based on public CTK 12.x releases if sys.platform.startswith("linux"): @@ -82,13 +80,7 @@ def _get_nvvm_wheel(): raise AssertionError() if sp is not None: - dso_dir = os.path.join( - sp, - "nvidia", - "cuda_nvcc", - "nvvm", - dso_dir - ) + dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) dso_path = os.path.join(dso_dir, dso_path) if os.path.exists(dso_path): return str(Path(dso_path).parent) @@ -101,7 +93,7 @@ def _get_libdevice_paths(): out = os.path.join(libdir, "libdevice.10.bc") else: # Search for pattern - pat = r'libdevice(\.\d+)*\.bc$' + pat = r"libdevice(\.\d+)*\.bc$" candidates = find_file(re.compile(pat), libdir) # Keep only the max (most recent version) of the bitcode files. out = max(candidates, default=None) @@ -110,24 +102,24 @@ def _get_libdevice_paths(): def _cudalib_path(): if IS_WIN32: - return 'bin' + return "bin" else: - return 'lib64' + return "lib64" def _cuda_home_static_cudalib_path(): if IS_WIN32: - return ('lib', 'x64') + return ("lib", "x64") else: - return ('lib64',) + return ("lib64",) def _get_cudalib_dir_path_decision(): options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_cudalib_ctk()), - ('CUDA_HOME', get_cuda_home(_cudalib_path())), - ('System', get_system_ctk(_cudalib_path())), + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), ] by, libdir = _find_valid_path(options) return by, libdir @@ -135,10 +127,10 @@ def _get_cudalib_dir_path_decision(): def _get_static_cudalib_dir_path_decision(): options = [ - ('Conda environment', get_conda_ctk()), - ('Conda environment (NVIDIA package)', get_nvidia_static_cudalib_ctk()), - ('CUDA_HOME', get_cuda_home(*_cuda_home_static_cudalib_path())), - ('System', get_system_ctk(_cudalib_path())), + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), ] by, libdir = _find_valid_path(options) return by, libdir @@ -155,25 +147,23 @@ def _get_static_cudalib_dir(): def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist. - """ + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" # Linux? - if sys.platform.startswith('linux'): + if sys.platform.startswith("linux"): # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = '/usr/local/cuda' + base = "/usr/local/cuda" if os.path.exists(base): return os.path.join(base, *subdirs) def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) if not is_conda_env: return # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib('nvvm') + paths = find_lib("nvvm") if not paths: return # Use the directory name of the max path @@ -181,9 +171,8 @@ def get_conda_ctk(): def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) if not is_conda_env: return @@ -191,16 +180,16 @@ def get_nvidia_nvvm_ctk(): # conda package is installed. # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, 'nvvm', _cudalib_path()) + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, 'Library', 'nvvm', _cudalib_path()) + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) if not os.path.exists(libdir) or not os.path.isdir(libdir): # If that doesn't exist either, assume we don't have the NVIDIA # conda package return - paths = find_lib('nvvm', libdir=libdir) + paths = find_lib("nvvm", libdir=libdir) if not paths: return # Use the directory name of the max path @@ -208,39 +197,36 @@ def get_nvidia_nvvm_ctk(): def get_nvidia_libdevice_ctk(): - """Return path to directory containing the libdevice library. - """ + """Return path to directory containing the libdevice library.""" nvvm_ctk = get_nvidia_nvvm_ctk() if not nvvm_ctk: return nvvm_dir = os.path.dirname(nvvm_ctk) - return os.path.join(nvvm_dir, 'libdevice') + return os.path.join(nvvm_dir, "libdevice") def get_nvidia_cudalib_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit. - """ + """Return path to directory containing the shared libraries of cudatoolkit.""" nvvm_ctk = get_nvidia_nvvm_ctk() if not nvvm_ctk: return env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - subdir = 'bin' if IS_WIN32 else 'lib' + subdir = "bin" if IS_WIN32 else "lib" return os.path.join(env_dir, subdir) def get_nvidia_static_cudalib_ctk(): - """Return path to directory containing the static libraries of cudatoolkit. - """ + """Return path to directory containing the static libraries of cudatoolkit.""" nvvm_ctk = get_nvidia_nvvm_ctk() if not nvvm_ctk: return if IS_WIN32 and ("Library" not in nvvm_ctk): # Location specific to CUDA 11.x packages on Windows - dirs = ('Lib', 'x64') + dirs = ("Lib", "x64") else: # Linux, or Windows with CUDA 12.x packages - dirs = ('lib',) + dirs = ("lib",) env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) return os.path.join(env_dir, *dirs) @@ -251,10 +237,10 @@ def get_cuda_home(*subdirs): If *subdirs* are the subdirectory name to be appended in the resulting path. """ - cuda_home = os.environ.get('CUDA_HOME') + cuda_home = os.environ.get("CUDA_HOME") if cuda_home is None: # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get('CUDA_PATH') + cuda_home = os.environ.get("CUDA_PATH") if cuda_home is not None: return os.path.join(cuda_home, *subdirs) @@ -265,7 +251,7 @@ def _get_nvvm_path(): # The NVVM path is a directory, not a file path = os.path.join(path, "libnvvm.so") else: - candidates = find_lib('nvvm', path) + candidates = find_lib("nvvm", path) path = max(candidates) if candidates else None return _env_path_tuple(by, path) @@ -282,16 +268,16 @@ def get_cuda_paths(): Note: The result of the function is cached. """ # Check cache - if hasattr(get_cuda_paths, '_cached_result'): + if hasattr(get_cuda_paths, "_cached_result"): return get_cuda_paths._cached_result else: # Not in cache d = { - 'nvvm': _get_nvvm_path(), - 'libdevice': _get_libdevice_paths(), - 'cudalib_dir': _get_cudalib_dir(), - 'static_cudalib_dir': _get_static_cudalib_dir(), - 'include_dir': _get_include_dir(), + "nvvm": _get_nvvm_path(), + "libdevice": _get_libdevice_paths(), + "cudalib_dir": _get_cudalib_dir(), + "static_cudalib_dir": _get_static_cudalib_dir(), + "include_dir": _get_include_dir(), } # Cache result get_cuda_paths._cached_result = d @@ -303,7 +289,7 @@ def get_debian_pkg_libdevice(): Return the Debian NVIDIA Maintainers-packaged libdevice location, if it exists. """ - pkg_libdevice_location = '/usr/lib/nvidia-cuda-toolkit/libdevice' + pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" if not os.path.exists(pkg_libdevice_location): return None return pkg_libdevice_location @@ -332,13 +318,10 @@ def get_current_cuda_target_name(): machine = platform.machine() if system == "Linux": - arch_to_targets = { - 'x86_64': 'x86_64-linux', - 'aarch64': 'sbsa-linux' - } + arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} elif system == "Windows": arch_to_targets = { - 'AMD64': 'x64', + "AMD64": "x64", } else: arch_to_targets = {} @@ -351,26 +334,24 @@ def get_conda_include_dir(): Return the include directory in the current conda environment, if one is active and it exists. """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta')) + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) if not is_conda_env: return if platform.system() == "Windows": - include_dir = os.path.join( - sys.prefix, 'Library', 'include' - ) + include_dir = os.path.join(sys.prefix, "Library", "include") elif target_name := get_current_cuda_target_name(): - include_dir = os.path.join( - sys.prefix, 'targets', target_name, 'include' - ) + include_dir = os.path.join(sys.prefix, "targets", target_name, "include") else: # A fallback when target cannot determined # though usually it shouldn't. - include_dir = os.path.join(sys.prefix, 'include') + include_dir = os.path.join(sys.prefix, "include") - if (os.path.exists(include_dir) and os.path.isdir(include_dir) - and os.path.exists(os.path.join(include_dir, - 'cuda_device_runtime_api.h'))): + if ( + os.path.exists(include_dir) + and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) + ): return include_dir return @@ -378,8 +359,8 @@ def get_conda_include_dir(): def _get_include_dir(): """Find the root include directory.""" options = [ - ('Conda environment (NVIDIA package)', get_conda_include_dir()), - ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH), + ("Conda environment (NVIDIA package)", get_conda_include_dir()), + ("CUDA_INCLUDE_PATH Config Entry", config.CUDA_INCLUDE_PATH), # TODO: add others ] by, include_dir = _find_valid_path(options) From 0c5aca5da90a8d33382317c620bae2ba1cae5f7e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 19 Mar 2025 11:59:50 -0700 Subject: [PATCH 08/69] Minimal changes to adapt numba-cuda/numba_cuda/numba/cuda/cuda_paths.py from https://github.com/NVIDIA/numba-cuda/pull/155 --- .../cuda/bindings/ecosystem/cuda_paths.py | 46 +++++++++++++++++-- .../tests/show_ecosystem_cuda_paths.py | 5 ++ 2 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 cuda_bindings/tests/show_ecosystem_cuda_paths.py diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py index d1f4a85f0..7a7c760c0 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py @@ -3,16 +3,52 @@ import re import site import sys +import traceback +import warnings from collections import namedtuple from pathlib import Path -from numba import config -from numba.core.config import IS_WIN32 -from numba.misc.findlib import find_file, find_lib +from .findlib import find_file, find_lib + +IS_WIN32 = sys.platform.startswith("win32") _env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) +def _get_numba_CUDA_INCLUDE_PATH(): + # From numba/numba/core/config.py + + def _readenv(name, ctor, default): + value = os.environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn( # noqa: B028 + f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + "parsed.\nThe parse failed with exception:\n" + f"{traceback.format_exc()}", + RuntimeWarning, + ) + return default + + if IS_WIN32: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: # noqa: SIM108 + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) + return CUDA_INCLUDE_PATH + + +config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() + + def _find_valid_path(options): """Find valid path from *options*, which is a list of 2-tuple of (name, path). Return first pair where *path* is not None. @@ -221,7 +257,7 @@ def get_nvidia_static_cudalib_ctk(): if not nvvm_ctk: return - if IS_WIN32 and ("Library" not in nvvm_ctk): + if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 # Location specific to CUDA 11.x packages on Windows dirs = ("Lib", "x64") else: @@ -360,7 +396,7 @@ def _get_include_dir(): """Find the root include directory.""" options = [ ("Conda environment (NVIDIA package)", get_conda_include_dir()), - ("CUDA_INCLUDE_PATH Config Entry", config.CUDA_INCLUDE_PATH), + ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), # TODO: add others ] by, include_dir = _find_valid_path(options) diff --git a/cuda_bindings/tests/show_ecosystem_cuda_paths.py b/cuda_bindings/tests/show_ecosystem_cuda_paths.py new file mode 100644 index 000000000..aaddb4d92 --- /dev/null +++ b/cuda_bindings/tests/show_ecosystem_cuda_paths.py @@ -0,0 +1,5 @@ +from cuda.bindings.ecosystem import cuda_paths + +paths = cuda_paths.get_cuda_paths() +for k, v in cuda_paths.get_cuda_paths().items(): + print(f"{k}: {v}", flush=True) From ac8921dabbab737fffa12320627594b5d44c1fb3 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 20 Mar 2025 15:56:53 -0700 Subject: [PATCH 09/69] Rename ecosystem/cuda_paths.py -> path_finder.py --- .../bindings/{ecosystem => _path_finder_utils}/findlib.py | 0 .../bindings/{ecosystem/cuda_paths.py => path_finder.py} | 2 +- cuda_bindings/tests/path_finder.py | 4 ++++ cuda_bindings/tests/show_ecosystem_cuda_paths.py | 5 ----- 4 files changed, 5 insertions(+), 6 deletions(-) rename cuda_bindings/cuda/bindings/{ecosystem => _path_finder_utils}/findlib.py (100%) rename cuda_bindings/cuda/bindings/{ecosystem/cuda_paths.py => path_finder.py} (99%) create mode 100644 cuda_bindings/tests/path_finder.py delete mode 100644 cuda_bindings/tests/show_ecosystem_cuda_paths.py diff --git a/cuda_bindings/cuda/bindings/ecosystem/findlib.py b/cuda_bindings/cuda/bindings/_path_finder_utils/findlib.py similarity index 100% rename from cuda_bindings/cuda/bindings/ecosystem/findlib.py rename to cuda_bindings/cuda/bindings/_path_finder_utils/findlib.py diff --git a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py b/cuda_bindings/cuda/bindings/path_finder.py similarity index 99% rename from cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py rename to cuda_bindings/cuda/bindings/path_finder.py index 7a7c760c0..176c14756 100644 --- a/cuda_bindings/cuda/bindings/ecosystem/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -8,7 +8,7 @@ from collections import namedtuple from pathlib import Path -from .findlib import find_file, find_lib +from ._path_finder_utils.findlib import find_file, find_lib IS_WIN32 = sys.platform.startswith("win32") diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py new file mode 100644 index 000000000..98027034c --- /dev/null +++ b/cuda_bindings/tests/path_finder.py @@ -0,0 +1,4 @@ +from cuda.bindings import path_finder + +for k, v in path_finder.get_cuda_paths().items(): + print(f"{k}: {v}", flush=True) diff --git a/cuda_bindings/tests/show_ecosystem_cuda_paths.py b/cuda_bindings/tests/show_ecosystem_cuda_paths.py deleted file mode 100644 index aaddb4d92..000000000 --- a/cuda_bindings/tests/show_ecosystem_cuda_paths.py +++ /dev/null @@ -1,5 +0,0 @@ -from cuda.bindings.ecosystem import cuda_paths - -paths = cuda_paths.get_cuda_paths() -for k, v in cuda_paths.get_cuda_paths().items(): - print(f"{k}: {v}", flush=True) From 1c217dba978a51a021a0c20d989f078d6e935edf Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 21 Mar 2025 14:40:23 -0700 Subject: [PATCH 10/69] Plug cuda.bindings.path_finder into cuda/bindings/_internal/nvvm_linux.pyx --- .../cuda/bindings/_internal/nvvm_linux.pyx | 26 ++++++++++--------- .../cuda/bindings/_internal/utils.pxd | 1 - .../cuda/bindings/_internal/utils.pyx | 8 ------ cuda_bindings/tests/path_finder.py | 4 ++- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index e21218772..0e22fc5f9 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -6,10 +6,10 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvvm_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -51,16 +51,18 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvvm_dso_version_suffix(driver_ver): - so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') - return handle + cdef void* handle = NULL; + paths = path_finder.get_cuda_paths() + paths_nvvm = paths["nvvm"] + if paths_nvvm: + so_name = paths_nvvm.info + if so_name: + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') + return handle + raise RuntimeError('Unable to locate libnvvm.so') cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cac7846ff..cc06c4a77 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -167,4 +167,3 @@ cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) -cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 0a693c052..edee50d8f 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -133,11 +133,3 @@ cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): if 12000 <= driver_ver < 13000: return ('12', '') raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') - - -cdef tuple get_nvvm_dso_version_suffix(int driver_ver): - if 11000 <= driver_ver < 11020: - return ('3', '') - if 11020 <= driver_ver < 13000: - return ('4', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py index 98027034c..75abed477 100644 --- a/cuda_bindings/tests/path_finder.py +++ b/cuda_bindings/tests/path_finder.py @@ -1,4 +1,6 @@ from cuda.bindings import path_finder -for k, v in path_finder.get_cuda_paths().items(): +paths = path_finder.get_cuda_paths() + +for k, v in paths.items(): print(f"{k}: {v}", flush=True) From 3655d70c9a5c9d76e690bedc37b4f3ea539a0618 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 22 Mar 2025 09:14:00 -0700 Subject: [PATCH 11/69] Plug cuda.bindings.path_finder into cuda/bindings/_internal/nvjitlink_linux.pyx --- .../bindings/_internal/nvjitlink_linux.pyx | 25 +++++++++++-------- .../cuda/bindings/_internal/nvvm_linux.pyx | 11 ++++---- .../cuda/bindings/_internal/utils.pxd | 1 + .../cuda/bindings/_internal/utils.pyx | 8 ++++++ 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index eb882b4fb..3c6a25267 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,12 +4,14 @@ # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. -from libc.stdint cimport intptr_t +import os -from .utils cimport get_nvjitlink_dso_version_suffix +from libc.stdint cimport intptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -53,16 +55,17 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + cdef void* handle = NULL; + paths = path_finder.get_cuda_paths() + paths_cudalib_dir = paths["cudalib_dir"] + if paths_cudalib_dir: + so_name = os.path.join(paths_cudalib_dir.info, "libnvJitLink.so") handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle + if handle == NULL: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') + return handle + raise RuntimeError('Unable to locate libnvJitLink.so') cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 0e22fc5f9..3abf56e86 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -56,12 +56,11 @@ cdef void* load_library(const int driver_ver) except* with gil: paths_nvvm = paths["nvvm"] if paths_nvvm: so_name = paths_nvvm.info - if so_name: - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') - return handle + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') + return handle raise RuntimeError('Unable to locate libnvvm.so') diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cc06c4a77..cac7846ff 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -167,3 +167,4 @@ cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) +cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index edee50d8f..0a693c052 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -133,3 +133,11 @@ cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): if 12000 <= driver_ver < 13000: return ('12', '') raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') + + +cdef tuple get_nvvm_dso_version_suffix(int driver_ver): + if 11000 <= driver_ver < 11020: + return ('3', '') + if 11020 <= driver_ver < 13000: + return ('4', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') From 413313245270c1ceefa9036594ca0c7357c50002 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 22 Mar 2025 10:05:09 -0700 Subject: [PATCH 12/69] Fix `os.path.exists(None)` issue: ``` ______________________ ERROR collecting test_nvjitlink.py ______________________ tests/test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests/test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda/bindings/_internal/nvjitlink.pyx:257: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:260: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:208: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda/bindings/_internal/nvjitlink.pyx:102: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda/bindings/_internal/nvjitlink.pyx:59: in cuda.bindings._internal.nvjitlink.load_library ??? /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:312: in get_cuda_paths "nvvm": _get_nvvm_path(), /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:285: in _get_nvvm_path by, path = _get_nvvm_path_decision() /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:96: in _get_nvvm_path_decision if os.path.exists(nvvm_ctk_dir): :19: in exists ??? E TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType ``` --- cuda_bindings/cuda/bindings/path_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 176c14756..a52ae613a 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -93,7 +93,7 @@ def _get_nvvm_path_decision(): ] # need to ensure nvvm dir actually exists nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) - if os.path.exists(nvvm_ctk_dir): + if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): options.append(("System", nvvm_ctk_dir)) by, path = _find_valid_path(options) From 746cee04005ec32c4476091a0c5779dece5a21d2 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 22 Mar 2025 10:27:32 -0700 Subject: [PATCH 13/69] Fix another `os.path.exists(None)` issue: ``` ______________________ ERROR collecting test_nvjitlink.py ______________________ tests/test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests/test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda/bindings/_internal/nvjitlink.pyx:257: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:260: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:208: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda/bindings/_internal/nvjitlink.pyx:102: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda/bindings/_internal/nvjitlink.pyx:59: in cuda.bindings._internal.nvjitlink.load_library ??? /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:313: in get_cuda_paths "libdevice": _get_libdevice_paths(), /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:126: in _get_libdevice_paths by, libdir = _get_libdevice_path_decision() /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:73: in _get_libdevice_path_decision if os.path.exists(libdevice_ctk_dir): :19: in exists ??? E TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType ``` --- cuda_bindings/cuda/bindings/path_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index a52ae613a..103bee56d 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -70,7 +70,7 @@ def _get_libdevice_path_decision(): ("NVIDIA NVCC Wheel", get_libdevice_wheel()), ] libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") - if os.path.exists(libdevice_ctk_dir): + if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): options.append(("System", libdevice_ctk_dir)) by, libdir = _find_valid_path(options) From 08c3041ebfcc00c38c6be55f8413f7d7672c4d8f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 24 Mar 2025 22:26:34 -0700 Subject: [PATCH 14/69] =?UTF-8?q?Change=20"/lib64/"=20=E2=86=92=20"/lib/"?= =?UTF-8?q?=20in=20nvjitlink=5Flinux.pyx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 3c6a25267..ed9b19100 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -59,7 +59,10 @@ cdef void* load_library(const int driver_ver) except* with gil: paths = path_finder.get_cuda_paths() paths_cudalib_dir = paths["cudalib_dir"] if paths_cudalib_dir: + # TODO(rwgk): Produce the correct so_name in path_finder.py so_name = os.path.join(paths_cudalib_dir.info, "libnvJitLink.so") + if not os.path.exists(so_name) and so_name.count("/lib64/") == 1: + so_name = so_name.replace("/lib64/", "/lib/") handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) if handle == NULL: err_msg = dlerror() From 02694c76d879bd4f29eb37efd951f6d4303d3fe9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 09:18:11 -0700 Subject: [PATCH 15/69] nvjitlink_linux.pyx load_library() enhancements, mainly to avoid os.path.join(None, "libnvJitLink.so") --- .../bindings/_internal/nvjitlink_linux.pyx | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index ed9b19100..f5f376d2b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -55,20 +55,24 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(const int driver_ver) except* with gil: + so_basename = "libnvJitLink.so" cdef void* handle = NULL; paths = path_finder.get_cuda_paths() paths_cudalib_dir = paths["cudalib_dir"] - if paths_cudalib_dir: + if (paths_cudalib_dir and + paths_cudalib_dir.info and + os.path.isdir(paths_cudalib_dir.info)): # TODO(rwgk): Produce the correct so_name in path_finder.py - so_name = os.path.join(paths_cudalib_dir.info, "libnvJitLink.so") + so_name = os.path.join(paths_cudalib_dir.info, so_basename) if not os.path.exists(so_name) and so_name.count("/lib64/") == 1: so_name = so_name.replace("/lib64/", "/lib/") - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') - return handle - raise RuntimeError('Unable to locate libnvJitLink.so') + if os.path.exists(so_name): + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') + return handle + raise RuntimeError('Unable to locate {so_basename}') cdef int _check_or_init_nvjitlink() except -1 nogil: From 4e9972283be6750d7f69dba96cb0a03a33465a6e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 09:53:48 -0700 Subject: [PATCH 16/69] Add missing f-string f --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index f5f376d2b..f24365ab6 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -72,7 +72,7 @@ cdef void* load_library(const int driver_ver) except* with gil: err_msg = dlerror() raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') return handle - raise RuntimeError('Unable to locate {so_basename}') + raise RuntimeError(f'Unable to locate {so_basename}') cdef int _check_or_init_nvjitlink() except -1 nogil: From 76aa826df8a66a0656b28531f8989ea8739cf71d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 11:03:18 -0700 Subject: [PATCH 17/69] Add back get_nvjitlink_dso_version_suffix() call. --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index f24365ab6..35cd3f6e5 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -8,6 +8,8 @@ import os from libc.stdint cimport intptr_t +from .utils cimport get_nvjitlink_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder @@ -55,6 +57,10 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(const int driver_ver) except* with gil: + # TODO(rwgk): Move the version check here. + # Intentionally ignoring returned value: + get_nvjitlink_dso_version_suffix(driver_ver) + so_basename = "libnvJitLink.so" cdef void* handle = NULL; paths = path_finder.get_cuda_paths() From f8b3dd5a536150682b97633322b07128f9bd7820 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 13:00:57 -0700 Subject: [PATCH 18/69] pytest -ra -s -v --- .github/workflows/test-wheel-linux.yml | 8 ++++---- .github/workflows/test-wheel-windows.yml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 322f859e3..19c78c8cc 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -205,7 +205,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + pytest -ra -s -v tests/cython fi popd @@ -229,7 +229,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -243,7 +243,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + pytest -ra -s -v tests/cython fi popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 4e48590a3..233f56e4f 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -186,7 +186,7 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location @@ -210,7 +210,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ Pop-Location - name: Ensure cuda-python installable From 0ea73b1768b617d1e1a10ae029b8d943e1ae6aec Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 14:17:52 -0700 Subject: [PATCH 19/69] Rewrite nvjitlink_linux.pyx load_library() to produce detailed error messages. --- .../bindings/_internal/nvjitlink_linux.pyx | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 35cd3f6e5..fa8e5c401 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -65,20 +65,31 @@ cdef void* load_library(const int driver_ver) except* with gil: cdef void* handle = NULL; paths = path_finder.get_cuda_paths() paths_cudalib_dir = paths["cudalib_dir"] - if (paths_cudalib_dir and - paths_cudalib_dir.info and - os.path.isdir(paths_cudalib_dir.info)): - # TODO(rwgk): Produce the correct so_name in path_finder.py - so_name = os.path.join(paths_cudalib_dir.info, so_basename) - if not os.path.exists(so_name) and so_name.count("/lib64/") == 1: - so_name = so_name.replace("/lib64/", "/lib/") - if os.path.exists(so_name): + if not paths_cudalib_dir: + raise RuntimeError("Failure obtaining paths_cudalib_dir") + if not paths_cudalib_dir.info: + raise RuntimeError("Failure obtaining paths_cudalib_dir.info") + candidate_so_dirs = [paths_cudalib_dir.info] + libs = ["/lib/", "/lib64/"] + for _ in range(2): + alt_dir = libs[0].join(paths_cudalib_dir.info.rsplit(libs[1], 1)) + if alt_dir not in candidate_so_dirs: + candidate_so_dirs.append(alt_dir) + libs.reverse() + candidate_so_names = [ + os.path.join(so_dirname, so_basename) + for so_dirname in candidate_so_dirs] + error_messages = [] + for so_name in candidate_so_names: + if not os.path.exists(so_name): + error_messages.append(f"No such file: {so_name}") + else: handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') - return handle - raise RuntimeError(f'Unable to locate {so_basename}') + if handle != NULL: + return handle + err_msg = dlerror().decode(errors="backslashreplace") + error_messages.append(f"Failed to dlopen {so_name}: {err_msg}") + raise RuntimeError(f"Unable to load {so_basename}: {', '.join(error_messages)}") cdef int _check_or_init_nvjitlink() except -1 nogil: From 4e0ec81386c2a6f1da7a1ad11ee92c22b1daa6c5 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 15:50:07 -0700 Subject: [PATCH 20/69] Attach listdir output to "Unable to load" exception message. --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index fa8e5c401..e135c927e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -89,7 +89,13 @@ cdef void* load_library(const int driver_ver) except* with gil: return handle err_msg = dlerror().decode(errors="backslashreplace") error_messages.append(f"Failed to dlopen {so_name}: {err_msg}") - raise RuntimeError(f"Unable to load {so_basename}: {', '.join(error_messages)}") + attachment = [] + for so_dirname in candidate_so_dirs: + attachment.append(f" listdir({repr(so_dirname)}):") + for node in sorted(os.listdir(so_dirname)): + attachment.append(f" {node}") + attachment = "\n".join(attachment) + raise RuntimeError(f"Unable to load {so_basename} from: {', '.join(error_messages)}\n{attachment}") cdef int _check_or_init_nvjitlink() except -1 nogil: From e421b4880c8286535ebd0e39a52a27509cb21436 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 19:50:21 -0700 Subject: [PATCH 21/69] Guard os.listdir() call with os.path.isdir() --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index e135c927e..a03f0b3a4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -92,8 +92,11 @@ cdef void* load_library(const int driver_ver) except* with gil: attachment = [] for so_dirname in candidate_so_dirs: attachment.append(f" listdir({repr(so_dirname)}):") - for node in sorted(os.listdir(so_dirname)): - attachment.append(f" {node}") + if not os.path.isdir(so_dirname): + attachment.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(so_dirname)): + attachment.append(f" {node}") attachment = "\n".join(attachment) raise RuntimeError(f"Unable to load {so_basename} from: {', '.join(error_messages)}\n{attachment}") From 1239be9816e84b6b95b9d023e23b6573495ac98f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 25 Mar 2025 20:20:26 -0700 Subject: [PATCH 22/69] Fix logic error in nvjitlink_linux.pyx load_library() --- cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index a03f0b3a4..6c31699c9 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -69,15 +69,16 @@ cdef void* load_library(const int driver_ver) except* with gil: raise RuntimeError("Failure obtaining paths_cudalib_dir") if not paths_cudalib_dir.info: raise RuntimeError("Failure obtaining paths_cudalib_dir.info") - candidate_so_dirs = [paths_cudalib_dir.info] + primary_so_dir = paths_cudalib_dir.info + "/" + candidate_so_dirs = [primary_so_dir] libs = ["/lib/", "/lib64/"] for _ in range(2): - alt_dir = libs[0].join(paths_cudalib_dir.info.rsplit(libs[1], 1)) + alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) if alt_dir not in candidate_so_dirs: candidate_so_dirs.append(alt_dir) libs.reverse() candidate_so_names = [ - os.path.join(so_dirname, so_basename) + so_dirname + so_basename for so_dirname in candidate_so_dirs] error_messages = [] for so_name in candidate_so_names: From e82c878deb58bc302effd508d9193acbfa96066c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 26 Mar 2025 12:18:31 -0700 Subject: [PATCH 23/69] Move path_finder.py to _path_finder_utils/cuda_paths.py, import only public functions from new path_finder.py --- .../bindings/_path_finder_utils/cuda_paths.py | 403 ++++++++++++++++ cuda_bindings/cuda/bindings/path_finder.py | 436 ++---------------- 2 files changed, 436 insertions(+), 403 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py new file mode 100644 index 000000000..e27e6f54b --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py @@ -0,0 +1,403 @@ +import os +import platform +import re +import site +import sys +import traceback +import warnings +from collections import namedtuple +from pathlib import Path + +from .findlib import find_file, find_lib + +IS_WIN32 = sys.platform.startswith("win32") + +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) + + +def _get_numba_CUDA_INCLUDE_PATH(): + # From numba/numba/core/config.py + + def _readenv(name, ctor, default): + value = os.environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn( # noqa: B028 + f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + "parsed.\nThe parse failed with exception:\n" + f"{traceback.format_exc()}", + RuntimeWarning, + ) + return default + + if IS_WIN32: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: # noqa: SIM108 + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) + return CUDA_INCLUDE_PATH + + +config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() + + +def _find_valid_path(options): + """Find valid path from *options*, which is a list of 2-tuple of + (name, path). Return first pair where *path* is not None. + If no valid path is found, return ('', None) + """ + for by, data in options: + if data is not None: + return by, data + else: + return "", None + + +def _get_libdevice_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), + ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice()), + ("NVIDIA NVCC Wheel", get_libdevice_wheel()), + ] + libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") + if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): + options.append(("System", libdevice_ctk_dir)) + + by, libdir = _find_valid_path(options) + return by, libdir + + +def _nvvm_lib_dir(): + if IS_WIN32: + return "nvvm", "bin" + else: + return "nvvm", "lib64" + + +def _get_nvvm_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ] + # need to ensure nvvm dir actually exists + nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) + if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): + options.append(("System", nvvm_ctk_dir)) + + by, path = _find_valid_path(options) + return by, path + + +def _get_nvvm_wheel(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + for sp in site_paths: + # The SONAME is taken based on public CTK 12.x releases + if sys.platform.startswith("linux"): + dso_dir = "lib64" + # Hack: libnvvm from Linux wheel + # does not have any soname (CUDAINST-3183) + dso_path = "libnvvm.so" + elif sys.platform.startswith("win32"): + dso_dir = "bin" + dso_path = "nvvm64_40_0.dll" + else: + raise AssertionError() + + if sp is not None: + dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) + dso_path = os.path.join(dso_dir, dso_path) + if os.path.exists(dso_path): + return str(Path(dso_path).parent) + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + out = os.path.join(libdir, "libdevice.10.bc") + else: + # Search for pattern + pat = r"libdevice(\.\d+)*\.bc$" + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + +def _cudalib_path(): + if IS_WIN32: + return "bin" + else: + return "lib64" + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ("lib", "x64") + else: + return ("lib64",) + + +def _get_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def get_system_ctk(*subdirs): + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" + # Linux? + if sys.platform.startswith("linux"): + # Is cuda alias to /usr/local/cuda? + # We are intentionally not getting versioned cuda installation. + base = "/usr/local/cuda" + if os.path.exists(base): + return os.path.join(base, *subdirs) + + +def get_conda_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + # Assume the existence of NVVM to imply cudatoolkit installed + paths = find_lib("nvvm") + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_nvvm_ctk(): + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + # Assume the existence of NVVM in the conda env implies that a CUDA toolkit + # conda package is installed. + + # First, try the location used on Linux and the Windows 11.x packages + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that fails, try the location used for Windows 12.x packages + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that doesn't exist either, assume we don't have the NVIDIA + # conda package + return + + paths = find_lib("nvvm", libdir=libdir) + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, "libdevice") + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = "bin" if IS_WIN32 else "lib" + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 + # Location specific to CUDA 11.x packages on Windows + dirs = ("Lib", "x64") + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ("lib",) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + +def get_cuda_home(*subdirs): + """Get paths of CUDA_HOME. + If *subdirs* are the subdirectory name to be appended in the resulting + path. + """ + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + # Try Windows CUDA installation without Anaconda + cuda_home = os.environ.get("CUDA_PATH") + if cuda_home is not None: + return os.path.join(cuda_home, *subdirs) + + +def _get_nvvm_path(): + by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + path = os.path.join(path, "libnvvm.so") + else: + candidates = find_lib("nvvm", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, "_cached_result"): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + "nvvm": _get_nvvm_path(), + "libdevice": _get_libdevice_paths(), + "cudalib_dir": _get_cudalib_dir(), + "static_cudalib_dir": _get_static_cudalib_dir(), + "include_dir": _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} + elif system == "Windows": + arch_to_targets = { + "AMD64": "x64", + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join(sys.prefix, "Library", "include") + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join(sys.prefix, "targets", target_name, "include") + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, "include") + + if ( + os.path.exists(include_dir) + and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) + ): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ("Conda environment (NVIDIA package)", get_conda_include_dir()), + ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 103bee56d..5f1c46814 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -1,403 +1,33 @@ -import os -import platform -import re -import site -import sys -import traceback -import warnings -from collections import namedtuple -from pathlib import Path - -from ._path_finder_utils.findlib import find_file, find_lib - -IS_WIN32 = sys.platform.startswith("win32") - -_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) - - -def _get_numba_CUDA_INCLUDE_PATH(): - # From numba/numba/core/config.py - - def _readenv(name, ctor, default): - value = os.environ.get(name) - if value is None: - return default() if callable(default) else default - try: - return ctor(value) - except Exception: - warnings.warn( # noqa: B028 - f"Environment variable '{name}' is defined but " - f"its associated value '{value}' could not be " - "parsed.\nThe parse failed with exception:\n" - f"{traceback.format_exc()}", - RuntimeWarning, - ) - return default - - if IS_WIN32: - cuda_path = os.environ.get("CUDA_PATH") - if cuda_path: # noqa: SIM108 - default_cuda_include_path = os.path.join(cuda_path, "include") - else: - default_cuda_include_path = "cuda_include_not_found" - else: - default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") - CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) - return CUDA_INCLUDE_PATH - - -config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() - - -def _find_valid_path(options): - """Find valid path from *options*, which is a list of 2-tuple of - (name, path). Return first pair where *path* is not None. - If no valid path is found, return ('', None) - """ - for by, data in options: - if data is not None: - return by, data - else: - return "", None - - -def _get_libdevice_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), - ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice()), - ("NVIDIA NVCC Wheel", get_libdevice_wheel()), - ] - libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") - if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): - options.append(("System", libdevice_ctk_dir)) - - by, libdir = _find_valid_path(options) - return by, libdir - - -def _nvvm_lib_dir(): - if IS_WIN32: - return "nvvm", "bin" - else: - return "nvvm", "lib64" - - -def _get_nvvm_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), - ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), - ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), - ] - # need to ensure nvvm dir actually exists - nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) - if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): - options.append(("System", nvvm_ctk_dir)) - - by, path = _find_valid_path(options) - return by, path - - -def _get_nvvm_wheel(): - site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] - for sp in site_paths: - # The SONAME is taken based on public CTK 12.x releases - if sys.platform.startswith("linux"): - dso_dir = "lib64" - # Hack: libnvvm from Linux wheel - # does not have any soname (CUDAINST-3183) - dso_path = "libnvvm.so" - elif sys.platform.startswith("win32"): - dso_dir = "bin" - dso_path = "nvvm64_40_0.dll" - else: - raise AssertionError() - - if sp is not None: - dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) - dso_path = os.path.join(dso_dir, dso_path) - if os.path.exists(dso_path): - return str(Path(dso_path).parent) - - -def _get_libdevice_paths(): - by, libdir = _get_libdevice_path_decision() - if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - out = os.path.join(libdir, "libdevice.10.bc") - else: - # Search for pattern - pat = r"libdevice(\.\d+)*\.bc$" - candidates = find_file(re.compile(pat), libdir) - # Keep only the max (most recent version) of the bitcode files. - out = max(candidates, default=None) - return _env_path_tuple(by, out) - - -def _cudalib_path(): - if IS_WIN32: - return "bin" - else: - return "lib64" - - -def _cuda_home_static_cudalib_path(): - if IS_WIN32: - return ("lib", "x64") - else: - return ("lib64",) - - -def _get_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir - - -def _get_static_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir - - -def _get_cudalib_dir(): - by, libdir = _get_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def _get_static_cudalib_dir(): - by, libdir = _get_static_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" - # Linux? - if sys.platform.startswith("linux"): - # Is cuda alias to /usr/local/cuda? - # We are intentionally not getting versioned cuda installation. - base = "/usr/local/cuda" - if os.path.exists(base): - return os.path.join(base, *subdirs) - - -def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib("nvvm") - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - # Assume the existence of NVVM in the conda env implies that a CUDA toolkit - # conda package is installed. - - # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that doesn't exist either, assume we don't have the NVIDIA - # conda package - return - - paths = find_lib("nvvm", libdir=libdir) - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_libdevice_ctk(): - """Return path to directory containing the libdevice library.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - nvvm_dir = os.path.dirname(nvvm_ctk) - return os.path.join(nvvm_dir, "libdevice") - - -def get_nvidia_cudalib_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - subdir = "bin" if IS_WIN32 else "lib" - return os.path.join(env_dir, subdir) - - -def get_nvidia_static_cudalib_ctk(): - """Return path to directory containing the static libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - - if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 - # Location specific to CUDA 11.x packages on Windows - dirs = ("Lib", "x64") - else: - # Linux, or Windows with CUDA 12.x packages - dirs = ("lib",) - - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - return os.path.join(env_dir, *dirs) - - -def get_cuda_home(*subdirs): - """Get paths of CUDA_HOME. - If *subdirs* are the subdirectory name to be appended in the resulting - path. - """ - cuda_home = os.environ.get("CUDA_HOME") - if cuda_home is None: - # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get("CUDA_PATH") - if cuda_home is not None: - return os.path.join(cuda_home, *subdirs) - - -def _get_nvvm_path(): - by, path = _get_nvvm_path_decision() - if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - path = os.path.join(path, "libnvvm.so") - else: - candidates = find_lib("nvvm", path) - path = max(candidates) if candidates else None - return _env_path_tuple(by, path) - - -def get_cuda_paths(): - """Returns a dictionary mapping component names to a 2-tuple - of (source_variable, info). - - The returned dictionary will have the following keys and infos: - - "nvvm": file_path - - "libdevice": List[Tuple[arch, file_path]] - - "cudalib_dir": directory_path - - Note: The result of the function is cached. - """ - # Check cache - if hasattr(get_cuda_paths, "_cached_result"): - return get_cuda_paths._cached_result - else: - # Not in cache - d = { - "nvvm": _get_nvvm_path(), - "libdevice": _get_libdevice_paths(), - "cudalib_dir": _get_cudalib_dir(), - "static_cudalib_dir": _get_static_cudalib_dir(), - "include_dir": _get_include_dir(), - } - # Cache result - get_cuda_paths._cached_result = d - return d - - -def get_debian_pkg_libdevice(): - """ - Return the Debian NVIDIA Maintainers-packaged libdevice location, if it - exists. - """ - pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" - if not os.path.exists(pkg_libdevice_location): - return None - return pkg_libdevice_location - - -def get_libdevice_wheel(): - nvvm_path = _get_nvvm_wheel() - if nvvm_path is None: - return None - nvvm_path = Path(nvvm_path) - libdevice_path = nvvm_path.parent / "libdevice" - - return str(libdevice_path) - - -def get_current_cuda_target_name(): - """Determine conda's CTK target folder based on system and machine arch. - - CTK's conda package delivers headers based on its architecture type. For example, - `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and - `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the - nuances at cudart's conda feedstock: - https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 - """ - system = platform.system() - machine = platform.machine() - - if system == "Linux": - arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} - elif system == "Windows": - arch_to_targets = { - "AMD64": "x64", - } - else: - arch_to_targets = {} - - return arch_to_targets.get(machine, None) - - -def get_conda_include_dir(): - """ - Return the include directory in the current conda environment, if one - is active and it exists. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - if platform.system() == "Windows": - include_dir = os.path.join(sys.prefix, "Library", "include") - elif target_name := get_current_cuda_target_name(): - include_dir = os.path.join(sys.prefix, "targets", target_name, "include") - else: - # A fallback when target cannot determined - # though usually it shouldn't. - include_dir = os.path.join(sys.prefix, "include") - - if ( - os.path.exists(include_dir) - and os.path.isdir(include_dir) - and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) - ): - return include_dir - return - - -def _get_include_dir(): - """Find the root include directory.""" - options = [ - ("Conda environment (NVIDIA package)", get_conda_include_dir()), - ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), - # TODO: add others - ] - by, include_dir = _find_valid_path(options) - return _env_path_tuple(by, include_dir) +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings._path_finder_utils.cuda_paths import ( + get_conda_ctk, + get_conda_include_dir, + get_cuda_home, + get_cuda_paths, + get_current_cuda_target_name, + get_debian_pkg_libdevice, + get_libdevice_wheel, + get_nvidia_cudalib_ctk, + get_nvidia_libdevice_ctk, + get_nvidia_nvvm_ctk, + get_nvidia_static_cudalib_ctk, + get_system_ctk, +) + +__all__ = [ + "get_conda_ctk", + "get_conda_include_dir", + "get_cuda_home", + "get_cuda_paths", + "get_current_cuda_target_name", + "get_debian_pkg_libdevice", + "get_libdevice_wheel", + "get_nvidia_cudalib_ctk", + "get_nvidia_libdevice_ctk", + "get_nvidia_nvvm_ctk", + "get_nvidia_static_cudalib_ctk", + "get_system_ctk", +] From 5bd23ec42094d20673de1e37b174e028696683c0 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 26 Mar 2025 14:18:26 -0700 Subject: [PATCH 24/69] Add find_nvidia_dynamic_library() and use from nvjitlink_linux.pyx, nvvm_linux.pyx --- .../bindings/_internal/nvjitlink_linux.pyx | 51 ++----------- .../cuda/bindings/_internal/nvvm_linux.pyx | 15 ++-- .../find_nvidia_dynamic_library.py | 76 +++++++++++++++++++ .../find_nvidia_lib_dirs.py | 30 ++++++++ cuda_bindings/cuda/bindings/path_finder.py | 2 + cuda_bindings/tests/path_finder.py | 3 + 6 files changed, 122 insertions(+), 55 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 6c31699c9..6e9de8ee5 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -8,8 +8,6 @@ import os from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder @@ -57,49 +55,12 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(const int driver_ver) except* with gil: - # TODO(rwgk): Move the version check here. - # Intentionally ignoring returned value: - get_nvjitlink_dso_version_suffix(driver_ver) - - so_basename = "libnvJitLink.so" - cdef void* handle = NULL; - paths = path_finder.get_cuda_paths() - paths_cudalib_dir = paths["cudalib_dir"] - if not paths_cudalib_dir: - raise RuntimeError("Failure obtaining paths_cudalib_dir") - if not paths_cudalib_dir.info: - raise RuntimeError("Failure obtaining paths_cudalib_dir.info") - primary_so_dir = paths_cudalib_dir.info + "/" - candidate_so_dirs = [primary_so_dir] - libs = ["/lib/", "/lib64/"] - for _ in range(2): - alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) - if alt_dir not in candidate_so_dirs: - candidate_so_dirs.append(alt_dir) - libs.reverse() - candidate_so_names = [ - so_dirname + so_basename - for so_dirname in candidate_so_dirs] - error_messages = [] - for so_name in candidate_so_names: - if not os.path.exists(so_name): - error_messages.append(f"No such file: {so_name}") - else: - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - return handle - err_msg = dlerror().decode(errors="backslashreplace") - error_messages.append(f"Failed to dlopen {so_name}: {err_msg}") - attachment = [] - for so_dirname in candidate_so_dirs: - attachment.append(f" listdir({repr(so_dirname)}):") - if not os.path.isdir(so_dirname): - attachment.append(" DIRECTORY DOES NOT EXIST") - else: - for node in sorted(os.listdir(so_dirname)): - attachment.append(f" {node}") - attachment = "\n".join(attachment) - raise RuntimeError(f"Unable to load {so_basename} from: {', '.join(error_messages)}\n{attachment}") + so_name = path_finder.find_nvidia_dynamic_library("nvJitLink") + cdef void* handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + return handle + err_msg = dlerror().decode(errors="backslashreplace") + raise RuntimeError(f"Failed to dlopen {so_name}: {err_msg}") cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 3abf56e86..4d3135321 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -51,17 +51,12 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle = NULL; - paths = path_finder.get_cuda_paths() - paths_nvvm = paths["nvvm"] - if paths_nvvm: - so_name = paths_nvvm.info - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen {so_name} ({err_msg.decode()})') + so_name = path_finder.find_nvidia_dynamic_library("nvvm") + cdef void* handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: return handle - raise RuntimeError('Unable to locate libnvvm.so') + err_msg = dlerror().decode(errors="backslashreplace") + raise RuntimeError(f"Failed to dlopen {so_name}: {err_msg}") cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py new file mode 100644 index 000000000..86d2198d7 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -0,0 +1,76 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os + +from .cuda_paths import get_cuda_paths +from .find_nvidia_lib_dirs import find_nvidia_lib_dirs + + +def _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments): + for lib_dir in find_nvidia_lib_dirs(): + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + for lib_dir in find_nvidia_lib_dirs(): + attachments.append(f" listdir({repr(lib_dir)}):") + for node in sorted(os.listdir(lib_dir)): + attachments.append(f" {node}") + return None + + +def _get_cuda_paths_info(key, error_messages): + env_path_tuple = get_cuda_paths()[key] + if not env_path_tuple: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') + return None + if not env_path_tuple.info: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') + return None + return env_path_tuple.info + + +def _find_using_lib_dir(so_basename, error_messages, attachments): + lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + primary_so_dir = lib_dir + "/" + candidate_so_dirs = [primary_so_dir] + libs = ["/lib/", "/lib64/"] + for _ in range(2): + alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) + if alt_dir not in candidate_so_dirs: + candidate_so_dirs.append(alt_dir) + libs.reverse() + candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] + error_messages = [] + for so_name in candidate_so_names: + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + for so_dirname in candidate_so_dirs: + attachments.append(f" listdir({repr(so_dirname)}):") + if not os.path.isdir(so_dirname): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(so_dirname)): + attachments.append(f" {node}") + return None + + +@functools.cache +def find_nvidia_dynamic_library(libbasename): + so_basename = f"lib{libbasename}.so" + error_messages = [] + attachments = [] + so_name = _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments) + if so_name is None: + if libbasename == "nvvm": + so_name = _get_cuda_paths_info("nvvm", error_messages) + else: + so_name = _find_using_lib_dir(so_basename, error_messages, attachments) + if so_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Unable to load {so_basename} from: {', '.join(error_messages)}\n{attachments}") + return so_name diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py new file mode 100644 index 000000000..2e1cfc78d --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py @@ -0,0 +1,30 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os +import sys + + +@functools.cache +def _find_nvidia_lib_dirs(sys_path): + results = [] + for base in sys_path: + nvidia_base = os.path.join(base, "nvidia") + if not os.path.isdir(nvidia_base): + continue + try: + subdirs = os.listdir(nvidia_base) + except OSError: + continue + for sub in subdirs: + sub_path = os.path.join(nvidia_base, sub) + lib_path = os.path.join(sub_path, "lib") + if os.path.isdir(lib_path): + results.append(lib_path) + return results + + +def find_nvidia_lib_dirs(): + return _find_nvidia_lib_dirs(tuple(sys.path)) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 5f1c46814..fa45ec0d1 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -16,8 +16,10 @@ get_nvidia_static_cudalib_ctk, get_system_ctk, ) +from cuda.bindings._path_finder_utils.find_nvidia_dynamic_library import find_nvidia_dynamic_library __all__ = [ + "find_nvidia_dynamic_library", "get_conda_ctk", "get_conda_include_dir", "get_cuda_home", diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py index 75abed477..e9245a5be 100644 --- a/cuda_bindings/tests/path_finder.py +++ b/cuda_bindings/tests/path_finder.py @@ -4,3 +4,6 @@ for k, v in paths.items(): print(f"{k}: {v}", flush=True) + +print(path_finder.find_nvidia_dynamic_library("nvvm")) +print(path_finder.find_nvidia_dynamic_library("nvJitLink")) From ec02838a8803a75ad0878bf1ee7f68f1f87abe73 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 26 Mar 2025 19:34:45 -0700 Subject: [PATCH 25/69] Fix oversight in _find_using_lib_dir() --- .../bindings/_path_finder_utils/find_nvidia_dynamic_library.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index 86d2198d7..885f78388 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -35,6 +35,8 @@ def _get_cuda_paths_info(key, error_messages): def _find_using_lib_dir(so_basename, error_messages, attachments): lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if lib_dir is None: + return None primary_so_dir = lib_dir + "/" candidate_so_dirs = [primary_so_dir] libs = ["/lib/", "/lib64/"] From 5d8b58b844a1619843c7f1c60409d33649f15b2e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 27 Mar 2025 08:47:45 -0700 Subject: [PATCH 26/69] Also look for versioned library in _find_using_nvidia_lib_dirs() --- .../find_nvidia_dynamic_library.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index 885f78388..d53195f08 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools +import glob import os from .cuda_paths import get_cuda_paths @@ -10,11 +11,19 @@ def _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments): + so_wild = so_basename + "*" for lib_dir in find_nvidia_lib_dirs(): + # First look for an exact match so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): return so_name - error_messages.append(f"No such file: {so_name}") + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for node in sorted(glob.glob(so_wild, root_dir=lib_dir)): + so_name = os.path.join(lib_dir, node) + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_wild}") for lib_dir in find_nvidia_lib_dirs(): attachments.append(f" listdir({repr(lib_dir)}):") for node in sorted(os.listdir(lib_dir)): @@ -33,7 +42,7 @@ def _get_cuda_paths_info(key, error_messages): return env_path_tuple.info -def _find_using_lib_dir(so_basename, error_messages, attachments): +def _find_using_cudalib_dir(so_basename, error_messages, attachments): lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) if lib_dir is None: return None @@ -71,8 +80,8 @@ def find_nvidia_dynamic_library(libbasename): if libbasename == "nvvm": so_name = _get_cuda_paths_info("nvvm", error_messages) else: - so_name = _find_using_lib_dir(so_basename, error_messages, attachments) + so_name = _find_using_cudalib_dir(so_basename, error_messages, attachments) if so_name is None: attachments = "\n".join(attachments) - raise RuntimeError(f"Unable to load {so_basename} from: {', '.join(error_messages)}\n{attachments}") + raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") return so_name From 7fd7cba88e540589c32ddd661d1fa4e156c553d1 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 27 Mar 2025 09:19:17 -0700 Subject: [PATCH 27/69] glob.glob() Python 3.9 compatibility --- .../bindings/_path_finder_utils/find_nvidia_dynamic_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index d53195f08..7f98561bd 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -19,7 +19,7 @@ def _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments): return so_name # Look for a versioned library # Using sort here mainly to make the result deterministic. - for node in sorted(glob.glob(so_wild, root_dir=lib_dir)): + for node in sorted(glob.glob(os.path.join(lib_dir, so_wild))): so_name = os.path.join(lib_dir, node) if os.path.isfile(so_name): return so_name From 269b7f8bfd3d43897d295509f1c7703008a6f1e8 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 08:28:56 -0700 Subject: [PATCH 28/69] Reduce build-and-test.yml to Windows-only, Python 3.12 only. --- .github/workflows/build-and-test.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 288a5624b..f8a7f7c06 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -20,15 +20,9 @@ jobs: fail-fast: false matrix: host-platform: - - linux-64 - - linux-aarch64 - win-64 python-version: - - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" @@ -205,6 +199,7 @@ jobs: echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT test-linux: + if: false # jobs disabled strategy: fail-fast: false # TODO: add driver version here From b0db24f9cfa3847e6a3e11c00f0225c7c7ef431e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 08:48:29 -0700 Subject: [PATCH 29/69] Comment out `if: ${{ github.repository_owner == nvidia }}` --- .github/workflows/build-and-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index f8a7f7c06..b5c32f492 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -27,7 +27,7 @@ jobs: # Note: this is for build-time only. - "12.8.0" name: Build (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}) - if: ${{ github.repository_owner == 'nvidia' }} + # if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout runs-on: ${{ (matrix.host-platform == 'linux-64' && 'linux-amd64-cpu8') || @@ -269,7 +269,7 @@ jobs: runner: - default name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) - if: ${{ github.repository_owner == 'nvidia' }} + # if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout needs: From 2a7780cca42065cc866100f0d2aa387d22b2ea8c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 08:50:28 -0700 Subject: [PATCH 30/69] Revert "Comment out `if: ${{ github.repository_owner == nvidia }}`" This reverts commit b0db24f9cfa3847e6a3e11c00f0225c7c7ef431e. --- .github/workflows/build-and-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b5c32f492..f8a7f7c06 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -27,7 +27,7 @@ jobs: # Note: this is for build-time only. - "12.8.0" name: Build (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}) - # if: ${{ github.repository_owner == 'nvidia' }} + if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout runs-on: ${{ (matrix.host-platform == 'linux-64' && 'linux-amd64-cpu8') || @@ -269,7 +269,7 @@ jobs: runner: - default name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) - # if: ${{ github.repository_owner == 'nvidia' }} + if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout needs: From c2136ea2220ae83193b4d3ac73f169b96edae002 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 08:51:38 -0700 Subject: [PATCH 31/69] Add back `linux-64` `host-platform` --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index f8a7f7c06..38b8408fe 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -20,6 +20,7 @@ jobs: fail-fast: false matrix: host-platform: + - linux-64 - win-64 python-version: - "3.12" @@ -199,7 +200,6 @@ jobs: echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT test-linux: - if: false # jobs disabled strategy: fail-fast: false # TODO: add driver version here From 1bb71513fea05054779312caac054a09b212b8a7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 09:53:16 -0700 Subject: [PATCH 32/69] Rewrite load_library() in nvjitlink_windows.pyx to use path_finder.find_nvidia_dynamic_library() --- .../bindings/_internal/nvjitlink_windows.pyx | 77 +++++++------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index b8ab705d8..e25da2386 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -42,54 +39,32 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink_{suffix}0_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvJitLink') - - assert handle != 0 - return handle + dll_name = path_finder.find_nvidia_dynamic_library("nvJitLink") + + errors = [f"Failed to load {dll_name}", "Exceptions encountered:"] + + # First check if the DLL has been loaded by 3rd parties + try: + return win32api.GetModuleHandle(dll_name) + except BaseException as e: + errors.append(f"{type(e)}: {str(e)}") + + try: + return win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + dll_name, + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except BaseException as e: + errors.append(f"{type(e)}: {str(e)}") + + # Finally, try default search + try: + return win32api.LoadLibrary(dll_name) + except BaseException as e: + errors.append(f"{type(e)}: {str(e)}") + + raise RuntimeError("\n".join(errors)) cdef int _check_or_init_nvjitlink() except -1 nogil: From 00466f8ccef97de4caef502f239d0621dbfb1000 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 11:07:39 -0700 Subject: [PATCH 33/69] Revert "Rewrite load_library() in nvjitlink_windows.pyx to use path_finder.find_nvidia_dynamic_library()" This reverts commit 1bb71513fea05054779312caac054a09b212b8a7. --- .../bindings/_internal/nvjitlink_windows.pyx | 77 ++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index e25da2386..b8ab705d8 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,9 +6,12 @@ from libc.stdint cimport intptr_t +from .utils cimport get_nvjitlink_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError -from cuda.bindings import path_finder +import os +import site import win32api @@ -39,32 +42,54 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + cdef load_library(const int driver_ver): - dll_name = path_finder.find_nvidia_dynamic_library("nvJitLink") - - errors = [f"Failed to load {dll_name}", "Exceptions encountered:"] - - # First check if the DLL has been loaded by 3rd parties - try: - return win32api.GetModuleHandle(dll_name) - except BaseException as e: - errors.append(f"{type(e)}: {str(e)}") - - try: - return win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - dll_name, - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except BaseException as e: - errors.append(f"{type(e)}: {str(e)}") - - # Finally, try default search - try: - return win32api.LoadLibrary(dll_name) - except BaseException as e: - errors.append(f"{type(e)}: {str(e)}") - - raise RuntimeError("\n".join(errors)) + handle = 0 + + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink_{suffix}0_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvJitLink') + + assert handle != 0 + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: From 389cd5a1b3efeac281e64a392b4e00781ad0a716 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 11:59:05 -0700 Subject: [PATCH 34/69] Add _inspect_environment() in find_nvidia_dynamic_library.py, call from nvjitlink_windows.pyx, nvvm_windows.pyx --- .../bindings/_internal/nvjitlink_windows.pyx | 4 +++ .../cuda/bindings/_internal/nvvm_windows.pyx | 4 +++ .../find_nvidia_dynamic_library.py | 36 +++++++++++++++++-- cuda_bindings/tests/path_finder.py | 4 +-- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index b8ab705d8..579393169 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -10,6 +10,8 @@ from .utils cimport get_nvjitlink_dso_version_suffix from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + import os import site @@ -88,6 +90,8 @@ cdef load_library(const int driver_ver): else: raise RuntimeError('Failed to load nvJitLink') + path_finder.find_nvidia_dynamic_library("nvJitLink", handle) + assert handle != 0 return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index b8e679547..dc6e37610 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -10,6 +10,8 @@ from .utils cimport get_nvvm_dso_version_suffix from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + import os import site @@ -86,6 +88,8 @@ cdef load_library(const int driver_ver): else: raise RuntimeError('Failed to load nvvm') + path_finder.find_nvidia_dynamic_library("nvvm", handle) + assert handle != 0 return handle diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index 7f98561bd..9a849ff75 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -5,8 +5,10 @@ import functools import glob import os +import sys +import traceback -from .cuda_paths import get_cuda_paths +from .cuda_paths import IS_WIN32, get_cuda_paths from .find_nvidia_lib_dirs import find_nvidia_lib_dirs @@ -70,8 +72,38 @@ def _find_using_cudalib_dir(so_basename, error_messages, attachments): return None +def _inspect_environment(libbasename, handle): + if IS_WIN32: + import win32api + + dll_path = win32api.GetModuleFileName(handle) + print(f"LOOOK {libbasename=} Loaded DLL path:", dll_path) + error_messages = [] + lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if lib_dir is None: + print(f"LOOOK {libbasename=} {error_messages=}") + elif not os.path.isdir(lib_dir): + print(f"LOOOK {libbasename=} not isdir({lib_dir=})") + else: + print(f"LOOOK {libbasename=} cudalib_dir {lib_dir=}") + for node in sorted(os.listdir(lib_dir)): + print(f"LOOOK {node}") + for lib_dir in find_nvidia_lib_dirs(): + print(f"LOOOK {libbasename=} NVIDIA {lib_dir=}") + for node in sorted(os.listdir(lib_dir)): + print(f"LOOOK {node}") + + @functools.cache -def find_nvidia_dynamic_library(libbasename): +def find_nvidia_dynamic_library(libbasename, handle=None): + if handle is not None: + try: + _inspect_environment(libbasename, handle) + except Exception as e: + print("LOOOK EXCEPTION:") + traceback.print_exception(type(e), e, e.__traceback__, file=sys.stdout) + if IS_WIN32: + return so_basename = f"lib{libbasename}.so" error_messages = [] attachments = [] diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py index e9245a5be..7f6b0e8d9 100644 --- a/cuda_bindings/tests/path_finder.py +++ b/cuda_bindings/tests/path_finder.py @@ -5,5 +5,5 @@ for k, v in paths.items(): print(f"{k}: {v}", flush=True) -print(path_finder.find_nvidia_dynamic_library("nvvm")) -print(path_finder.find_nvidia_dynamic_library("nvJitLink")) +print(path_finder.find_nvidia_dynamic_library("nvvm", "TEST")) +print(path_finder.find_nvidia_dynamic_library("nvJitLink", "TEST")) From 1112fce360fbd07a6a4f8458036dc68a3e690854 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 17:16:39 -0700 Subject: [PATCH 35/69] Add & use _find_dll_using_nvidia_bin_dirs(), _find_dll_using_cudalib_dir() --- .../find_nvidia_dynamic_library.py | 118 ++++++++++-------- .../find_nvidia_lib_dirs.py | 30 ----- .../sys_path_find_sub_dirs.py | 40 ++++++ cuda_bindings/tests/path_finder.py | 4 +- .../tests/test_sys_path_find_sub_dirs.py | 72 +++++++++++ 5 files changed, 183 insertions(+), 81 deletions(-) delete mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py create mode 100644 cuda_bindings/tests/test_sys_path_find_sub_dirs.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index 9a849ff75..44c0e78f5 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -5,31 +5,52 @@ import functools import glob import os -import sys -import traceback from .cuda_paths import IS_WIN32, get_cuda_paths -from .find_nvidia_lib_dirs import find_nvidia_lib_dirs +from .sys_path_find_sub_dirs import sys_path_find_sub_dirs -def _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments): - so_wild = so_basename + "*" - for lib_dir in find_nvidia_lib_dirs(): +def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): + error_messages.append(f"No such file: {file_wild}") + for sub_dir in sys_path_find_sub_dirs(sub_dirs): + attachments.append(f' listdir("{sub_dir}"):') + for node in sorted(os.listdir(sub_dir)): + attachments.append(f" {node}") + + +def _find_so_using_nvidia_lib_dirs(libbasename, so_basename, error_messages, attachments): + if libbasename == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") + else: + nvidia_sub_dirs = ("nvidia", "*", "lib") + file_wild = so_basename + "*" + for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): # First look for an exact match so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): return so_name # Look for a versioned library # Using sort here mainly to make the result deterministic. - for node in sorted(glob.glob(os.path.join(lib_dir, so_wild))): + for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): so_name = os.path.join(lib_dir, node) if os.path.isfile(so_name): return so_name - error_messages.append(f"No such file: {so_wild}") - for lib_dir in find_nvidia_lib_dirs(): - attachments.append(f" listdir({repr(lib_dir)}):") - for node in sorted(os.listdir(lib_dir)): - attachments.append(f" {node}") + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _find_dll_using_nvidia_bin_dirs(libbasename, error_messages, attachments): + if libbasename == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") + else: + nvidia_sub_dirs = ("nvidia", "*", "bin") + file_wild = libbasename + "*.dll" + for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + dll_name = os.path.join(bin_dir, node) + if os.path.isfile(dll_name): + return dll_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None @@ -44,11 +65,11 @@ def _get_cuda_paths_info(key, error_messages): return env_path_tuple.info -def _find_using_cudalib_dir(so_basename, error_messages, attachments): - lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if lib_dir is None: +def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: return None - primary_so_dir = lib_dir + "/" + primary_so_dir = cudalib_dir + "/" candidate_so_dirs = [primary_so_dir] libs = ["/lib/", "/lib64/"] for _ in range(2): @@ -63,7 +84,7 @@ def _find_using_cudalib_dir(so_basename, error_messages, attachments): return so_name error_messages.append(f"No such file: {so_name}") for so_dirname in candidate_so_dirs: - attachments.append(f" listdir({repr(so_dirname)}):") + attachments.append(f' listdir("{so_dirname}"):') if not os.path.isdir(so_dirname): attachments.append(" DIRECTORY DOES NOT EXIST") else: @@ -72,47 +93,46 @@ def _find_using_cudalib_dir(so_basename, error_messages, attachments): return None -def _inspect_environment(libbasename, handle): - if IS_WIN32: - import win32api - - dll_path = win32api.GetModuleFileName(handle) - print(f"LOOOK {libbasename=} Loaded DLL path:", dll_path) - error_messages = [] - lib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if lib_dir is None: - print(f"LOOOK {libbasename=} {error_messages=}") - elif not os.path.isdir(lib_dir): - print(f"LOOOK {libbasename=} not isdir({lib_dir=})") - else: - print(f"LOOOK {libbasename=} cudalib_dir {lib_dir=}") - for node in sorted(os.listdir(lib_dir)): - print(f"LOOOK {node}") - for lib_dir in find_nvidia_lib_dirs(): - print(f"LOOOK {libbasename=} NVIDIA {lib_dir=}") - for node in sorted(os.listdir(lib_dir)): - print(f"LOOOK {node}") +def _find_dll_using_cudalib_dir(libbasename, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + file_wild = libbasename + "*.dll" + for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): + dll_name = os.path.join(cudalib_dir, node) + if os.path.isfile(dll_name): + return dll_name + error_messages.append(f"No such file: {file_wild}") + attachments.append(f' listdir("{cudalib_dir}"):') + for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f" {node}") + return None @functools.cache -def find_nvidia_dynamic_library(libbasename, handle=None): - if handle is not None: - try: - _inspect_environment(libbasename, handle) - except Exception as e: - print("LOOOK EXCEPTION:") - traceback.print_exception(type(e), e, e.__traceback__, file=sys.stdout) - if IS_WIN32: - return - so_basename = f"lib{libbasename}.so" +def find_nvidia_dynamic_library(libbasename): error_messages = [] attachments = [] - so_name = _find_using_nvidia_lib_dirs(so_basename, error_messages, attachments) + + if IS_WIN32: + dll_name = _find_dll_using_nvidia_bin_dirs(libbasename, error_messages, attachments) + if dll_name is None: + if libbasename == "nvvm": + dll_name = _get_cuda_paths_info("nvvm", error_messages) + else: + dll_name = _find_dll_using_cudalib_dir(libbasename, error_messages, attachments) + if dll_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {libbasename}*.dll: {', '.join(error_messages)}\n{attachments}") + return dll_name + + so_basename = f"lib{libbasename}.so" + so_name = _find_so_using_nvidia_lib_dirs(libbasename, so_basename, error_messages, attachments) if so_name is None: if libbasename == "nvvm": so_name = _get_cuda_paths_info("nvvm", error_messages) else: - so_name = _find_using_cudalib_dir(so_basename, error_messages, attachments) + so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) if so_name is None: attachments = "\n".join(attachments) raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py deleted file mode 100644 index 2e1cfc78d..000000000 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_lib_dirs.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. -# -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - -import functools -import os -import sys - - -@functools.cache -def _find_nvidia_lib_dirs(sys_path): - results = [] - for base in sys_path: - nvidia_base = os.path.join(base, "nvidia") - if not os.path.isdir(nvidia_base): - continue - try: - subdirs = os.listdir(nvidia_base) - except OSError: - continue - for sub in subdirs: - sub_path = os.path.join(nvidia_base, sub) - lib_path = os.path.join(sub_path, "lib") - if os.path.isdir(lib_path): - results.append(lib_path) - return results - - -def find_nvidia_lib_dirs(): - return _find_nvidia_lib_dirs(tuple(sys.path)) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py new file mode 100644 index 000000000..d2da726c9 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py @@ -0,0 +1,40 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os +import sys + + +@functools.cache +def _impl(sys_path, sub_dirs): + results = [] + for base in sys_path: + stack = [(base, 0)] # (current_path, index into sub_dirs) + while stack: + current_path, idx = stack.pop() + if idx == len(sub_dirs): + if os.path.isdir(current_path): + results.append(current_path) + continue + + sub = sub_dirs[idx] + if sub == "*": + try: + entries = sorted(os.listdir(current_path)) + except OSError: + continue + for entry in entries: + entry_path = os.path.join(current_path, entry) + if os.path.isdir(entry_path): + stack.append((entry_path, idx + 1)) + else: + next_path = os.path.join(current_path, sub) + if os.path.isdir(next_path): + stack.append((next_path, idx + 1)) + return results + + +def sys_path_find_sub_dirs(sub_dirs): + return _impl(tuple(sys.path), tuple(sub_dirs)) diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py index 7f6b0e8d9..e9245a5be 100644 --- a/cuda_bindings/tests/path_finder.py +++ b/cuda_bindings/tests/path_finder.py @@ -5,5 +5,5 @@ for k, v in paths.items(): print(f"{k}: {v}", flush=True) -print(path_finder.find_nvidia_dynamic_library("nvvm", "TEST")) -print(path_finder.find_nvidia_dynamic_library("nvJitLink", "TEST")) +print(path_finder.find_nvidia_dynamic_library("nvvm")) +print(path_finder.find_nvidia_dynamic_library("nvJitLink")) diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py new file mode 100644 index 000000000..6bb958b95 --- /dev/null +++ b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py @@ -0,0 +1,72 @@ +import os + +import pytest + +from cuda.bindings._path_finder_utils.sys_path_find_sub_dirs import _impl + + +@pytest.fixture +def test_tree(tmp_path): + # Build: + # tmp_path/ + # sys1/nvidia/foo/lib + # sys1/nvidia/bar/lib + # sys2/nvidia/baz/nvvm/lib64 + base = tmp_path + (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) + (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) + (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) + + return { + "sys_path": ( + str(base / "sys1"), + str(base / "sys2"), + str(base / "nonexistent"), # should be ignored + ), + "base": base, + } + + +def test_exact_match(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "foo", "lib")) + expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] + assert result == expected + + +def test_single_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "lib")) + expected = [ + str(base / "sys1" / "nvidia" / "bar" / "lib"), + str(base / "sys1" / "nvidia" / "foo" / "lib"), + ] + assert sorted(result) == sorted(expected) + + +def test_double_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64")) + expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] + assert result == expected + + +def test_no_match(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ("nvidia", "nonexistent", "lib")) + assert result == [] + + +def test_empty_sys_path(): + result = _impl((), ("nvidia", "*", "lib")) + assert result == [] + + +def test_empty_sub_dirs(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ()) + expected = [p for p in sys_path if os.path.isdir(p)] + assert sorted(result) == sorted(expected) From eb0fe308b462b755ae6307e629e679ed8bacc469 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 23:04:15 -0700 Subject: [PATCH 36/69] Fix silly oversight: forgot to undo experimental change. --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 2 +- cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 579393169..e4a77e866 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -90,7 +90,7 @@ cdef load_library(const int driver_ver): else: raise RuntimeError('Failed to load nvJitLink') - path_finder.find_nvidia_dynamic_library("nvJitLink", handle) + path_finder.find_nvidia_dynamic_library("nvJitLink") assert handle != 0 return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index dc6e37610..53165771b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -88,7 +88,7 @@ cdef load_library(const int driver_ver): else: raise RuntimeError('Failed to load nvvm') - path_finder.find_nvidia_dynamic_library("nvvm", handle) + path_finder.find_nvidia_dynamic_library("nvvm") assert handle != 0 return handle From 8fd33bf1a69d8e4ece3540bd955804c4b0a850c3 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Mar 2025 23:07:24 -0700 Subject: [PATCH 37/69] Also reduce test test-linux matrix. --- .github/workflows/build-and-test.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 38b8408fe..b33bbcf65 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -206,13 +206,8 @@ jobs: matrix: host-platform: - linux-64 - - linux-aarch64 python-version: - - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" cuda-version: # Note: this is for test-time only. - "12.8.0" From 667d3ed06f1ef83c4a3580064ab78812f4e47807 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 1 Apr 2025 20:44:04 -0700 Subject: [PATCH 38/69] Reimplement load_library() functions in nvjitlink_windows.pyx, nvvm_windows.pyx to actively use path_finder.find_nvidia_dynamic_library() --- .../bindings/_internal/nvjitlink_windows.pyx | 63 +++++-------------- .../cuda/bindings/_internal/nvvm_windows.pyx | 63 +++++-------------- 2 files changed, 32 insertions(+), 94 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index e4a77e866..9f88fc6fb 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,8 +6,6 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder @@ -16,6 +14,7 @@ import os import site import win32api +import pywintypes ############################################################################### @@ -44,55 +43,25 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink_{suffix}0_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break + cdef str dll_path = path_finder.find_nvidia_dynamic_library("nvJitLink") + cdef str dll_name = os.path.basename(dll_path) + cdef intptr_t handle = 0 + + # Check if already loaded + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + pass else: - raise RuntimeError('Failed to load nvJitLink') + return handle - path_finder.find_nvidia_dynamic_library("nvJitLink") + # Not already loaded; load it + try: + handle = win32api.LoadLibrary(dll_path) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load NVVM DLL at {dll_path}: {e}") - assert handle != 0 return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 53165771b..f8714c961 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,8 +6,6 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvvm_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder @@ -16,6 +14,7 @@ import os import site import win32api +import pywintypes ############################################################################### @@ -42,55 +41,25 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvvm_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = "nvvm64_40_0" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break + cdef str dll_path = path_finder.find_nvidia_dynamic_library("nvvm") + cdef str dll_name = os.path.basename(dll_path) + cdef intptr_t handle = 0 + + # Check if already loaded + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + pass else: - raise RuntimeError('Failed to load nvvm') + return handle - path_finder.find_nvidia_dynamic_library("nvvm") + # Not already loaded; load it + try: + handle = win32api.LoadLibrary(dll_path) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load NVVM DLL at {dll_path}: {e}") - assert handle != 0 return handle From e4a48499e1ea88f2c2b17b2458f5d3581b1cb7ba Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 07:50:46 -0700 Subject: [PATCH 39/69] Factor out load_nvidia_dynamic_library() from _internal/nvjitlink_linux.pyx, nvvm_linux.pyx --- .../cuda/bindings/_internal/nvjitlink_linux.pyx | 12 ++++-------- .../cuda/bindings/_internal/nvvm_linux.pyx | 10 +++------- .../load_nvidia_dynamic_library.py | 13 +++++++++++++ cuda_bindings/cuda/bindings/path_finder.py | 2 ++ 4 files changed, 22 insertions(+), 15 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 6e9de8ee5..2f98dde0a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -6,7 +6,7 @@ import os -from libc.stdint cimport intptr_t +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -54,13 +54,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(const int driver_ver) except* with gil: - so_name = path_finder.find_nvidia_dynamic_library("nvJitLink") - cdef void* handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - return handle - err_msg = dlerror().decode(errors="backslashreplace") - raise RuntimeError(f"Failed to dlopen {so_name}: {err_msg}") +cdef void* load_library(int driver_ver) except* with gil: + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 4d3135321..fcf8686e4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,7 +4,7 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -51,12 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - so_name = path_finder.find_nvidia_dynamic_library("nvvm") - cdef void* handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - return handle - err_msg = dlerror().decode(errors="backslashreplace") - raise RuntimeError(f"Failed to dlopen {so_name}: {err_msg}") + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py new file mode 100644 index 000000000..cc4ac73f9 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -0,0 +1,13 @@ +import ctypes +import os + +from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + + +def load_nvidia_dynamic_library(name: str) -> int: + path = find_nvidia_dynamic_library(name) + try: + handle = ctypes.CDLL(path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) + return handle._handle # This is the actual `void*` value as an int + except OSError as e: + raise RuntimeError(f"Failed to dlopen {path}: {e}") from e diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index fa45ec0d1..2bb55cb3c 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -17,9 +17,11 @@ get_system_ctk, ) from cuda.bindings._path_finder_utils.find_nvidia_dynamic_library import find_nvidia_dynamic_library +from cuda.bindings._path_finder_utils.load_nvidia_dynamic_library import load_nvidia_dynamic_library __all__ = [ "find_nvidia_dynamic_library", + "load_nvidia_dynamic_library", "get_conda_ctk", "get_conda_include_dir", "get_cuda_home", From a140f5bc3ad414cf91d279e479d80b88c2585781 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 08:13:32 -0700 Subject: [PATCH 40/69] Generalize load_nvidia_dynamic_library.py to also work under Windows. --- .../bindings/_internal/nvjitlink_windows.pyx | 24 +++---------------- .../cuda/bindings/_internal/nvvm_windows.pyx | 24 +++---------------- .../load_nvidia_dynamic_library.py | 24 ++++++++++++++----- 3 files changed, 24 insertions(+), 48 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 9f88fc6fb..e9d55566f 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -4,7 +4,7 @@ # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. -from libc.stdint cimport intptr_t +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -14,7 +14,6 @@ import os import site import win32api -import pywintypes ############################################################################### @@ -44,25 +43,8 @@ cdef void* __nvJitLinkVersion = NULL cdef load_library(const int driver_ver): - cdef str dll_path = path_finder.find_nvidia_dynamic_library("nvJitLink") - cdef str dll_name = os.path.basename(dll_path) - cdef intptr_t handle = 0 - - # Check if already loaded - try: - handle = win32api.GetModuleHandle(dll_name) - except pywintypes.error: - pass - else: - return handle - - # Not already loaded; load it - try: - handle = win32api.LoadLibrary(dll_path) - except pywintypes.error as e: - raise RuntimeError(f"Failed to load NVVM DLL at {dll_path}: {e}") - - return handle + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index f8714c961..257c24ae8 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -4,7 +4,7 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -14,7 +14,6 @@ import os import site import win32api -import pywintypes ############################################################################### @@ -42,25 +41,8 @@ cdef void* __nvvmGetProgramLog = NULL cdef load_library(const int driver_ver): - cdef str dll_path = path_finder.find_nvidia_dynamic_library("nvvm") - cdef str dll_name = os.path.basename(dll_path) - cdef intptr_t handle = 0 - - # Check if already loaded - try: - handle = win32api.GetModuleHandle(dll_name) - except pywintypes.error: - pass - else: - return handle - - # Not already loaded; load it - try: - handle = win32api.LoadLibrary(dll_path) - except pywintypes.error as e: - raise RuntimeError(f"Failed to load NVVM DLL at {dll_path}: {e}") - - return handle + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py index cc4ac73f9..2110e0faf 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -1,13 +1,25 @@ import ctypes +import functools import os +import sys from .find_nvidia_dynamic_library import find_nvidia_dynamic_library +@functools.cache def load_nvidia_dynamic_library(name: str) -> int: - path = find_nvidia_dynamic_library(name) - try: - handle = ctypes.CDLL(path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) - return handle._handle # This is the actual `void*` value as an int - except OSError as e: - raise RuntimeError(f"Failed to dlopen {path}: {e}") from e + dl_path = find_nvidia_dynamic_library(name) + if sys.platform == "win32": + try: + handle = ctypes.windll.kernel32.LoadLibraryW(dl_path) + if not handle: + raise ctypes.WinError(ctypes.get_last_error()) + except Exception as e: + raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e + return handle + else: + try: + handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) + return handle._handle # Raw void* as int + except OSError as e: + raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e From da417da649ab77149c30f921c86532f4704e0596 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 08:28:40 -0700 Subject: [PATCH 41/69] Add `void*` return type to load_library() implementations in _internal/nvjitlink_windows.pyx, nvvm_windows.pyx --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 2 +- cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index e9d55566f..980b3404f 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -42,7 +42,7 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef load_library(const int driver_ver): +cdef void* load_library(int driver_ver) except* with gil: cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 257c24ae8..4f1715c03 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -40,7 +40,7 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef load_library(const int driver_ver): +cdef void* load_library(int driver_ver) except* with gil: cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") return handle From feed101319b556d1865a228f344734fae8b09c01 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 08:52:11 -0700 Subject: [PATCH 42/69] Resolve cython error: object handle vs `void*` handle ``` Error compiling Cython file: ------------------------------------------------------------ ... err = (__cuDriverGetVersion)(&driver_ver) if err != 0: raise RuntimeError('something went wrong') # Load library handle = load_library(driver_ver) ^ ------------------------------------------------------------ cuda\bindings\_internal\nvjitlink.pyx:72:29: Cannot convert 'void *' to Python object ``` --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 5 +++-- cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 980b3404f..2f67ee173 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -53,15 +53,16 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver + cdef void* handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 4f1715c03..3233046e1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -51,15 +51,16 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver + cdef void* handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) From 870f2e9d4c8e66758ee3c311dab4450a6b3c390b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 09:24:48 -0700 Subject: [PATCH 43/69] Resolve another cython error: `void*` handle vs `intptr_t` handle ``` Error compiling Cython file: ------------------------------------------------------------ ... handle = load_library(driver_ver) # Load function global __nvJitLinkCreate try: __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') ^ ------------------------------------------------------------ cuda\bindings\_internal\nvjitlink.pyx:78:73: Cannot convert 'void *' to Python object ``` --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 4 ++-- cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 2f67ee173..62e701324 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -53,7 +53,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver - cdef void* handle + cdef intptr_t handle with gil: # Load driver to check version try: @@ -70,7 +70,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 3233046e1..f92addc53 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -51,7 +51,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver - cdef void* handle + cdef intptr_t handle with gil: # Load driver to check version try: @@ -68,7 +68,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion From f14d76bb1f542891919ec8f64563ea5e339cd3a7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 10:31:05 -0700 Subject: [PATCH 44/69] Resolve signed/unsigned runtime error. Use uintptr_t consistently. https://github.com/NVIDIA/cuda-python/actions/runs/14224673173/job/39861750852?pr=447#logs ``` =================================== ERRORS ==================================== _____________________ ERROR collecting test_nvjitlink.py ______________________ tests\test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests\test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda\\bindings\\_internal\\nvjitlink.pyx:221: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda\\bindings\\_internal\\nvjitlink.pyx:224: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda\\bindings\\_internal\\nvjitlink.pyx:172: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda\\bindings\\_internal\\nvjitlink.pyx:73: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda\\bindings\\_internal\\nvjitlink.pyx:46: in cuda.bindings._internal.nvjitlink.load_library ??? E OverflowError: can't convert negative value to size_t ``` --- .../bindings/_internal/nvjitlink_windows.pyx | 66 +++++++++---------- .../cuda/bindings/_internal/nvvm_windows.pyx | 58 ++++++++-------- .../load_nvidia_dynamic_library.py | 4 +- 3 files changed, 62 insertions(+), 66 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 62e701324..04e08056b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -4,15 +4,13 @@ # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport uintptr_t from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder import os -import site - import win32api @@ -53,7 +51,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle + cdef uintptr_t handle with gil: # Load driver to check version try: @@ -62,7 +60,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -70,90 +68,90 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate try: - __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') except: pass global __nvJitLinkDestroy try: - __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') except: pass global __nvJitLinkAddData try: - __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') except: pass global __nvJitLinkAddFile try: - __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') except: pass global __nvJitLinkComplete try: - __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') except: pass global __nvJitLinkGetLinkedCubinSize try: - __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') except: pass global __nvJitLinkGetLinkedCubin try: - __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') except: pass global __nvJitLinkGetLinkedPtxSize try: - __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') except: pass global __nvJitLinkGetLinkedPtx try: - __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') except: pass global __nvJitLinkGetErrorLogSize try: - __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') except: pass global __nvJitLinkGetErrorLog try: - __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') except: pass global __nvJitLinkGetInfoLogSize try: - __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') except: pass global __nvJitLinkGetInfoLog try: - __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') except: pass global __nvJitLinkVersion try: - __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') + __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') except: pass @@ -173,46 +171,46 @@ cpdef dict _inspect_function_pointers(): cdef dict data = {} global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog global __nvJitLinkVersion - data["__nvJitLinkVersion"] = __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion func_ptrs = data return data diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index f92addc53..a858977da 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -4,15 +4,13 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport uintptr_t from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder import os -import site - import win32api @@ -51,7 +49,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle + cdef uintptr_t handle with gil: # Load driver to check version try: @@ -60,7 +58,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -68,78 +66,78 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion try: - __nvvmVersion = win32api.GetProcAddress(handle, 'nvvmVersion') + __nvvmVersion = win32api.GetProcAddress(handle, 'nvvmVersion') except: pass global __nvvmIRVersion try: - __nvvmIRVersion = win32api.GetProcAddress(handle, 'nvvmIRVersion') + __nvvmIRVersion = win32api.GetProcAddress(handle, 'nvvmIRVersion') except: pass global __nvvmCreateProgram try: - __nvvmCreateProgram = win32api.GetProcAddress(handle, 'nvvmCreateProgram') + __nvvmCreateProgram = win32api.GetProcAddress(handle, 'nvvmCreateProgram') except: pass global __nvvmDestroyProgram try: - __nvvmDestroyProgram = win32api.GetProcAddress(handle, 'nvvmDestroyProgram') + __nvvmDestroyProgram = win32api.GetProcAddress(handle, 'nvvmDestroyProgram') except: pass global __nvvmAddModuleToProgram try: - __nvvmAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmAddModuleToProgram') + __nvvmAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmAddModuleToProgram') except: pass global __nvvmLazyAddModuleToProgram try: - __nvvmLazyAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmLazyAddModuleToProgram') + __nvvmLazyAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmLazyAddModuleToProgram') except: pass global __nvvmCompileProgram try: - __nvvmCompileProgram = win32api.GetProcAddress(handle, 'nvvmCompileProgram') + __nvvmCompileProgram = win32api.GetProcAddress(handle, 'nvvmCompileProgram') except: pass global __nvvmVerifyProgram try: - __nvvmVerifyProgram = win32api.GetProcAddress(handle, 'nvvmVerifyProgram') + __nvvmVerifyProgram = win32api.GetProcAddress(handle, 'nvvmVerifyProgram') except: pass global __nvvmGetCompiledResultSize try: - __nvvmGetCompiledResultSize = win32api.GetProcAddress(handle, 'nvvmGetCompiledResultSize') + __nvvmGetCompiledResultSize = win32api.GetProcAddress(handle, 'nvvmGetCompiledResultSize') except: pass global __nvvmGetCompiledResult try: - __nvvmGetCompiledResult = win32api.GetProcAddress(handle, 'nvvmGetCompiledResult') + __nvvmGetCompiledResult = win32api.GetProcAddress(handle, 'nvvmGetCompiledResult') except: pass global __nvvmGetProgramLogSize try: - __nvvmGetProgramLogSize = win32api.GetProcAddress(handle, 'nvvmGetProgramLogSize') + __nvvmGetProgramLogSize = win32api.GetProcAddress(handle, 'nvvmGetProgramLogSize') except: pass global __nvvmGetProgramLog try: - __nvvmGetProgramLog = win32api.GetProcAddress(handle, 'nvvmGetProgramLog') + __nvvmGetProgramLog = win32api.GetProcAddress(handle, 'nvvmGetProgramLog') except: pass @@ -159,40 +157,40 @@ cpdef dict _inspect_function_pointers(): cdef dict data = {} global __nvvmVersion - data["__nvvmVersion"] = __nvvmVersion + data["__nvvmVersion"] = __nvvmVersion global __nvvmIRVersion - data["__nvvmIRVersion"] = __nvvmIRVersion + data["__nvvmIRVersion"] = __nvvmIRVersion global __nvvmCreateProgram - data["__nvvmCreateProgram"] = __nvvmCreateProgram + data["__nvvmCreateProgram"] = __nvvmCreateProgram global __nvvmDestroyProgram - data["__nvvmDestroyProgram"] = __nvvmDestroyProgram + data["__nvvmDestroyProgram"] = __nvvmDestroyProgram global __nvvmAddModuleToProgram - data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram + data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram global __nvvmLazyAddModuleToProgram - data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram + data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram global __nvvmCompileProgram - data["__nvvmCompileProgram"] = __nvvmCompileProgram + data["__nvvmCompileProgram"] = __nvvmCompileProgram global __nvvmVerifyProgram - data["__nvvmVerifyProgram"] = __nvvmVerifyProgram + data["__nvvmVerifyProgram"] = __nvvmVerifyProgram global __nvvmGetCompiledResultSize - data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize + data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize global __nvvmGetCompiledResult - data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult + data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult global __nvvmGetProgramLogSize - data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize + data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize global __nvvmGetProgramLog - data["__nvvmGetProgramLog"] = __nvvmGetProgramLog + data["__nvvmGetProgramLog"] = __nvvmGetProgramLog func_ptrs = data return data diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py index 2110e0faf..efa2f2796 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -16,10 +16,10 @@ def load_nvidia_dynamic_library(name: str) -> int: raise ctypes.WinError(ctypes.get_last_error()) except Exception as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e - return handle + return ctypes.c_size_t(handle).value # Ensures unsigned result else: try: handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) - return handle._handle # Raw void* as int + return handle._handle # Raw void* as unsigned int except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e From 19c50f87bce8971b12889c847e605ca003cf6d6e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 14:11:32 -0700 Subject: [PATCH 45/69] Change win32api.GetProcAddress` back to `intptr_t`. Changing load_nvidia_dynamic_library() to also use to-`intptr_t` conversion, for compatibility with win32api.GetProcAddress. Document that CDLL behaves differently (it uses to-`uintptr_t`). --- .../bindings/_internal/nvjitlink_windows.pyx | 66 +++++++++---------- .../cuda/bindings/_internal/nvvm_windows.pyx | 58 ++++++++-------- .../load_nvidia_dynamic_library.py | 6 +- 3 files changed, 66 insertions(+), 64 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 04e08056b..c007e9941 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -4,7 +4,7 @@ # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. -from libc.stdint cimport uintptr_t +from libc.stdint cimport intptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -41,7 +41,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") return handle @@ -51,7 +51,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver - cdef uintptr_t handle + cdef intptr_t handle with gil: # Load driver to check version try: @@ -60,7 +60,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -68,90 +68,90 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate try: - __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') except: pass global __nvJitLinkDestroy try: - __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') except: pass global __nvJitLinkAddData try: - __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') except: pass global __nvJitLinkAddFile try: - __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') except: pass global __nvJitLinkComplete try: - __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') except: pass global __nvJitLinkGetLinkedCubinSize try: - __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') except: pass global __nvJitLinkGetLinkedCubin try: - __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') except: pass global __nvJitLinkGetLinkedPtxSize try: - __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') except: pass global __nvJitLinkGetLinkedPtx try: - __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') except: pass global __nvJitLinkGetErrorLogSize try: - __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') except: pass global __nvJitLinkGetErrorLog try: - __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') except: pass global __nvJitLinkGetInfoLogSize try: - __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') except: pass global __nvJitLinkGetInfoLog try: - __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') except: pass global __nvJitLinkVersion try: - __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') + __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') except: pass @@ -171,46 +171,46 @@ cpdef dict _inspect_function_pointers(): cdef dict data = {} global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog global __nvJitLinkVersion - data["__nvJitLinkVersion"] = __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion func_ptrs = data return data diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index a858977da..b9662ff76 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -4,7 +4,7 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport uintptr_t +from libc.stdint cimport intptr_t from .utils import FunctionNotFoundError, NotSupportedError @@ -39,7 +39,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") return handle @@ -49,7 +49,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver - cdef uintptr_t handle + cdef intptr_t handle with gil: # Load driver to check version try: @@ -58,7 +58,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -66,78 +66,78 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion try: - __nvvmVersion = win32api.GetProcAddress(handle, 'nvvmVersion') + __nvvmVersion = win32api.GetProcAddress(handle, 'nvvmVersion') except: pass global __nvvmIRVersion try: - __nvvmIRVersion = win32api.GetProcAddress(handle, 'nvvmIRVersion') + __nvvmIRVersion = win32api.GetProcAddress(handle, 'nvvmIRVersion') except: pass global __nvvmCreateProgram try: - __nvvmCreateProgram = win32api.GetProcAddress(handle, 'nvvmCreateProgram') + __nvvmCreateProgram = win32api.GetProcAddress(handle, 'nvvmCreateProgram') except: pass global __nvvmDestroyProgram try: - __nvvmDestroyProgram = win32api.GetProcAddress(handle, 'nvvmDestroyProgram') + __nvvmDestroyProgram = win32api.GetProcAddress(handle, 'nvvmDestroyProgram') except: pass global __nvvmAddModuleToProgram try: - __nvvmAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmAddModuleToProgram') + __nvvmAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmAddModuleToProgram') except: pass global __nvvmLazyAddModuleToProgram try: - __nvvmLazyAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmLazyAddModuleToProgram') + __nvvmLazyAddModuleToProgram = win32api.GetProcAddress(handle, 'nvvmLazyAddModuleToProgram') except: pass global __nvvmCompileProgram try: - __nvvmCompileProgram = win32api.GetProcAddress(handle, 'nvvmCompileProgram') + __nvvmCompileProgram = win32api.GetProcAddress(handle, 'nvvmCompileProgram') except: pass global __nvvmVerifyProgram try: - __nvvmVerifyProgram = win32api.GetProcAddress(handle, 'nvvmVerifyProgram') + __nvvmVerifyProgram = win32api.GetProcAddress(handle, 'nvvmVerifyProgram') except: pass global __nvvmGetCompiledResultSize try: - __nvvmGetCompiledResultSize = win32api.GetProcAddress(handle, 'nvvmGetCompiledResultSize') + __nvvmGetCompiledResultSize = win32api.GetProcAddress(handle, 'nvvmGetCompiledResultSize') except: pass global __nvvmGetCompiledResult try: - __nvvmGetCompiledResult = win32api.GetProcAddress(handle, 'nvvmGetCompiledResult') + __nvvmGetCompiledResult = win32api.GetProcAddress(handle, 'nvvmGetCompiledResult') except: pass global __nvvmGetProgramLogSize try: - __nvvmGetProgramLogSize = win32api.GetProcAddress(handle, 'nvvmGetProgramLogSize') + __nvvmGetProgramLogSize = win32api.GetProcAddress(handle, 'nvvmGetProgramLogSize') except: pass global __nvvmGetProgramLog try: - __nvvmGetProgramLog = win32api.GetProcAddress(handle, 'nvvmGetProgramLog') + __nvvmGetProgramLog = win32api.GetProcAddress(handle, 'nvvmGetProgramLog') except: pass @@ -157,40 +157,40 @@ cpdef dict _inspect_function_pointers(): cdef dict data = {} global __nvvmVersion - data["__nvvmVersion"] = __nvvmVersion + data["__nvvmVersion"] = __nvvmVersion global __nvvmIRVersion - data["__nvvmIRVersion"] = __nvvmIRVersion + data["__nvvmIRVersion"] = __nvvmIRVersion global __nvvmCreateProgram - data["__nvvmCreateProgram"] = __nvvmCreateProgram + data["__nvvmCreateProgram"] = __nvvmCreateProgram global __nvvmDestroyProgram - data["__nvvmDestroyProgram"] = __nvvmDestroyProgram + data["__nvvmDestroyProgram"] = __nvvmDestroyProgram global __nvvmAddModuleToProgram - data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram + data["__nvvmAddModuleToProgram"] = __nvvmAddModuleToProgram global __nvvmLazyAddModuleToProgram - data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram + data["__nvvmLazyAddModuleToProgram"] = __nvvmLazyAddModuleToProgram global __nvvmCompileProgram - data["__nvvmCompileProgram"] = __nvvmCompileProgram + data["__nvvmCompileProgram"] = __nvvmCompileProgram global __nvvmVerifyProgram - data["__nvvmVerifyProgram"] = __nvvmVerifyProgram + data["__nvvmVerifyProgram"] = __nvvmVerifyProgram global __nvvmGetCompiledResultSize - data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize + data["__nvvmGetCompiledResultSize"] = __nvvmGetCompiledResultSize global __nvvmGetCompiledResult - data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult + data["__nvvmGetCompiledResult"] = __nvvmGetCompiledResult global __nvvmGetProgramLogSize - data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize + data["__nvvmGetProgramLogSize"] = __nvvmGetProgramLogSize global __nvvmGetProgramLog - data["__nvvmGetProgramLog"] = __nvvmGetProgramLog + data["__nvvmGetProgramLog"] = __nvvmGetProgramLog func_ptrs = data return data diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py index efa2f2796..90ada56c7 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -16,10 +16,12 @@ def load_nvidia_dynamic_library(name: str) -> int: raise ctypes.WinError(ctypes.get_last_error()) except Exception as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e - return ctypes.c_size_t(handle).value # Ensures unsigned result + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle # C signed int, matches win32api.GetProcAddress else: try: handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) - return handle._handle # Raw void* as unsigned int except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int From 93a66b0e02a31543a89f489a867785aa6f8774a7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 15:14:21 -0700 Subject: [PATCH 46/69] Use win32api.LoadLibrary() instead of ctypes.windll.kernel32.LoadLibraryW(), to be more similar to original (and working) cython code. Hoping to resolve this kind of error: ``` _ ERROR at setup of test_c_or_v_program_fail_bad_option[txt-compile_program] __ request = > @pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS) def minimal_nvvmir(request): for pass_counter in range(2): nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1) if nvvmir != -1: if nvvmir is None: pytest.skip(f"UNAVAILABLE: {request.param}") return nvvmir if pass_counter: raise AssertionError("This code path is meant to be unreachable.") # Build cache entries, then try again (above). > major, minor, debug_major, debug_minor = nvvm.ir_version() tests\test_nvvm.py:148: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cuda\bindings\nvvm.pyx:95: in cuda.bindings.nvvm.ir_version cpdef tuple ir_version(): cuda\bindings\nvvm.pyx:113: in cuda.bindings.nvvm.ir_version status = nvvmIRVersion(&major_ir, &minor_ir, &major_dbg, &minor_dbg) cuda\bindings\cynvvm.pyx:19: in cuda.bindings.cynvvm.nvvmIRVersion return _nvvm._nvvmIRVersion(majorIR, minorIR, majorDbg, minorDbg) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E cuda.bindings._internal.utils.FunctionNotFoundError: function nvvmIRVersion is not found ``` --- .../load_nvidia_dynamic_library.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py index 90ada56c7..d3514bc0b 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -1,21 +1,26 @@ -import ctypes import functools -import os import sys +if sys.platform == "win32": + import pywintypes + import win32api +else: + import ctypes + import os + from .find_nvidia_dynamic_library import find_nvidia_dynamic_library @functools.cache def load_nvidia_dynamic_library(name: str) -> int: + print(f"\nLOOOK load_nvidia_dynamic_library({name}) ENTRY", flush=True) dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": try: - handle = ctypes.windll.kernel32.LoadLibraryW(dl_path) - if not handle: - raise ctypes.WinError(ctypes.get_last_error()) - except Exception as e: + handle = win32api.LoadLibrary(dl_path) + except pywintypes.error as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e + print(f"\nLOOOK load_nvidia_dynamic_library({name}) RETURN {type(handle)=} {handle=}", flush=True) # Use `cdef void* ptr = ` in cython to convert back to void* return handle # C signed int, matches win32api.GetProcAddress else: @@ -23,5 +28,8 @@ def load_nvidia_dynamic_library(name: str) -> int: handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e + print( + f"\nLOOOK load_nvidia_dynamic_library({name}) RETURN {type(handle._handle)=} {handle._handle=}", flush=True + ) # Use `cdef void* ptr = ` in cython to convert back to void* return handle._handle # C unsigned int From 1944e4c7fd66d0b2fe25603d7efbc32163f8b748 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 15:40:28 -0700 Subject: [PATCH 47/69] Remove debug print statements. --- .../_path_finder_utils/load_nvidia_dynamic_library.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py index d3514bc0b..14d17c2b8 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py @@ -13,14 +13,12 @@ @functools.cache def load_nvidia_dynamic_library(name: str) -> int: - print(f"\nLOOOK load_nvidia_dynamic_library({name}) ENTRY", flush=True) dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": try: handle = win32api.LoadLibrary(dl_path) except pywintypes.error as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e - print(f"\nLOOOK load_nvidia_dynamic_library({name}) RETURN {type(handle)=} {handle=}", flush=True) # Use `cdef void* ptr = ` in cython to convert back to void* return handle # C signed int, matches win32api.GetProcAddress else: @@ -28,8 +26,5 @@ def load_nvidia_dynamic_library(name: str) -> int: handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e - print( - f"\nLOOOK load_nvidia_dynamic_library({name}) RETURN {type(handle._handle)=} {handle._handle=}", flush=True - ) # Use `cdef void* ptr = ` in cython to convert back to void* return handle._handle # C unsigned int From 28cd33428860c13dcc24a9c51bc125abc8c376d3 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 16:12:49 -0700 Subject: [PATCH 48/69] Remove some cruft. --- .../cuda/bindings/_internal/nvjitlink_linux.pyx | 2 -- .../cuda/bindings/_internal/nvjitlink_windows.pyx | 1 - .../cuda/bindings/_internal/nvvm_windows.pyx | 1 - cuda_bindings/cuda/bindings/_internal/utils.pxd | 3 --- cuda_bindings/cuda/bindings/_internal/utils.pyx | 14 -------------- 5 files changed, 21 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 2f98dde0a..00ac64362 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,8 +4,6 @@ # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. -import os - from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index c007e9941..d6ae51ebc 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -10,7 +10,6 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder -import os import win32api diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index b9662ff76..acde44adb 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -10,7 +10,6 @@ from .utils import FunctionNotFoundError, NotSupportedError from cuda.bindings import path_finder -import os import win32api diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cac7846ff..a4b71c531 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) -cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 0a693c052..7fc77b22c 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass - - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): - if 12000 <= driver_ver < 13000: - return ('12', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') - - -cdef tuple get_nvvm_dso_version_suffix(int driver_ver): - if 11000 <= driver_ver < 11020: - return ('3', '') - if 11020 <= driver_ver < 13000: - return ('4', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') From 491cda12049f5c02215c9bda00a0fa17f0135ba9 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 17:15:47 -0700 Subject: [PATCH 49/69] Trivial renaming of variables. No functional changes. --- .../find_nvidia_dynamic_library.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py index 44c0e78f5..30a9b68f4 100644 --- a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py @@ -18,8 +18,8 @@ def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): attachments.append(f" {node}") -def _find_so_using_nvidia_lib_dirs(libbasename, so_basename, error_messages, attachments): - if libbasename == "nvvm": # noqa: SIM108 +def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") else: nvidia_sub_dirs = ("nvidia", "*", "lib") @@ -39,12 +39,12 @@ def _find_so_using_nvidia_lib_dirs(libbasename, so_basename, error_messages, att return None -def _find_dll_using_nvidia_bin_dirs(libbasename, error_messages, attachments): - if libbasename == "nvvm": # noqa: SIM108 +def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") else: nvidia_sub_dirs = ("nvidia", "*", "bin") - file_wild = libbasename + "*.dll" + file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): dll_name = os.path.join(bin_dir, node) @@ -93,11 +93,11 @@ def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): return None -def _find_dll_using_cudalib_dir(libbasename, error_messages, attachments): +def _find_dll_using_cudalib_dir(libname, error_messages, attachments): cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) if cudalib_dir is None: return None - file_wild = libbasename + "*.dll" + file_wild = libname + "*.dll" for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): dll_name = os.path.join(cudalib_dir, node) if os.path.isfile(dll_name): @@ -110,26 +110,26 @@ def _find_dll_using_cudalib_dir(libbasename, error_messages, attachments): @functools.cache -def find_nvidia_dynamic_library(libbasename): +def find_nvidia_dynamic_library(name: str) -> str: error_messages = [] attachments = [] if IS_WIN32: - dll_name = _find_dll_using_nvidia_bin_dirs(libbasename, error_messages, attachments) + dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) if dll_name is None: - if libbasename == "nvvm": + if name == "nvvm": dll_name = _get_cuda_paths_info("nvvm", error_messages) else: - dll_name = _find_dll_using_cudalib_dir(libbasename, error_messages, attachments) + dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) if dll_name is None: attachments = "\n".join(attachments) - raise RuntimeError(f"Failure finding {libbasename}*.dll: {', '.join(error_messages)}\n{attachments}") + raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}") return dll_name - so_basename = f"lib{libbasename}.so" - so_name = _find_so_using_nvidia_lib_dirs(libbasename, so_basename, error_messages, attachments) + so_basename = f"lib{name}.so" + so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) if so_name is None: - if libbasename == "nvvm": + if name == "nvvm": so_name = _get_cuda_paths_info("nvvm", error_messages) else: so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) From cc6113cce20c5c6124d0676daeccb7db2fffd798 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 17:17:21 -0700 Subject: [PATCH 50/69] Revert debug changes under .github/workflows --- .github/workflows/build-and-test.yml | 10 ++++++++++ .github/workflows/test-wheel-linux.yml | 8 ++++---- .github/workflows/test-wheel-windows.yml | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b33bbcf65..288a5624b 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -21,9 +21,14 @@ jobs: matrix: host-platform: - linux-64 + - linux-aarch64 - win-64 python-version: + - "3.13" - "3.12" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" @@ -206,8 +211,13 @@ jobs: matrix: host-platform: - linux-64 + - linux-aarch64 python-version: + - "3.13" - "3.12" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for test-time only. - "12.8.0" diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 19c78c8cc..322f859e3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -205,7 +205,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -ra -s -v tests/cython + pytest -rxXs -v tests/cython fi popd @@ -229,7 +229,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -243,7 +243,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -ra -s -v tests/cython + pytest -rxXs -v tests/cython fi popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 233f56e4f..4e48590a3 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -186,7 +186,7 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location @@ -210,7 +210,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ Pop-Location - name: Ensure cuda-python installable From eaeb8365d404076ed1a92f80172f5563ac8929e5 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 2 Apr 2025 20:46:40 -0700 Subject: [PATCH 51/69] =?UTF-8?q?Rename=20=5Fpath=5Ffinder=5Futils=20?= =?UTF-8?q?=E2=86=92=20=5Fpath=5Ffinder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{_path_finder_utils => _path_finder}/cuda_paths.py | 0 .../find_nvidia_dynamic_library.py | 0 .../{_path_finder_utils => _path_finder}/findlib.py | 0 .../load_nvidia_dynamic_library.py | 0 .../sys_path_find_sub_dirs.py | 0 cuda_bindings/cuda/bindings/path_finder.py | 6 +++--- cuda_bindings/tests/test_sys_path_find_sub_dirs.py | 2 +- 7 files changed, 4 insertions(+), 4 deletions(-) rename cuda_bindings/cuda/bindings/{_path_finder_utils => _path_finder}/cuda_paths.py (100%) rename cuda_bindings/cuda/bindings/{_path_finder_utils => _path_finder}/find_nvidia_dynamic_library.py (100%) rename cuda_bindings/cuda/bindings/{_path_finder_utils => _path_finder}/findlib.py (100%) rename cuda_bindings/cuda/bindings/{_path_finder_utils => _path_finder}/load_nvidia_dynamic_library.py (100%) rename cuda_bindings/cuda/bindings/{_path_finder_utils => _path_finder}/sys_path_find_sub_dirs.py (100%) diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder_utils/cuda_paths.py rename to cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder_utils/find_nvidia_dynamic_library.py rename to cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder_utils/findlib.py rename to cuda_bindings/cuda/bindings/_path_finder/findlib.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder_utils/load_nvidia_dynamic_library.py rename to cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py diff --git a/cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder_utils/sys_path_find_sub_dirs.py rename to cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 2bb55cb3c..21aeb4b36 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings._path_finder_utils.cuda_paths import ( +from cuda.bindings._path_finder.cuda_paths import ( get_conda_ctk, get_conda_include_dir, get_cuda_home, @@ -16,8 +16,8 @@ get_nvidia_static_cudalib_ctk, get_system_ctk, ) -from cuda.bindings._path_finder_utils.find_nvidia_dynamic_library import find_nvidia_dynamic_library -from cuda.bindings._path_finder_utils.load_nvidia_dynamic_library import load_nvidia_dynamic_library +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library __all__ = [ "find_nvidia_dynamic_library", diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py index 6bb958b95..3297ce39e 100644 --- a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py +++ b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py @@ -2,7 +2,7 @@ import pytest -from cuda.bindings._path_finder_utils.sys_path_find_sub_dirs import _impl +from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl @pytest.fixture From 1b1139cda8b56f2fa37c5c0102ee7fe6b5963cab Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 4 Apr 2025 12:41:00 -0700 Subject: [PATCH 52/69] Remove LD_LIBRARY_PATH in fetch_ctk/action.yml --- .github/actions/fetch_ctk/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 669943296..417da4926 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,4 +123,3 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV From a603ef82436a1c82116ac53c87fda719611d287c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 4 Apr 2025 12:53:58 -0700 Subject: [PATCH 53/69] Linux: First try using the platform-specific dynamic loader search mechanisms --- .../_path_finder/load_nvidia_dynamic_library.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 14d17c2b8..d48e3f0da 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -10,9 +10,24 @@ from .find_nvidia_dynamic_library import find_nvidia_dynamic_library +_LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + @functools.cache def load_nvidia_dynamic_library(name: str) -> int: + # First try using the platform-specific dynamic loader search mechanisms + if sys.platform == "win32": + pass # TODO + else: + dl_path = f"lib{name}.so" # Version intentionally no specified. + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError: + pass + else: + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int + dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": try: @@ -23,7 +38,7 @@ def load_nvidia_dynamic_library(name: str) -> int: return handle # C signed int, matches win32api.GetProcAddress else: try: - handle = ctypes.CDLL(dl_path, mode=os.RTLD_NOW | os.RTLD_GLOBAL) + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* From daae8e5813ee0cc5fb09b17f599409d392c030fe Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 12:22:20 -0700 Subject: [PATCH 54/69] Add _windows_load_with_dll_basename() --- .../load_nvidia_dynamic_library.py | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index d48e3f0da..38d23bb54 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -2,22 +2,67 @@ import sys if sys.platform == "win32": + import ctypes.wintypes + import pywintypes import win32api + + # Mirrors WinBase.h (unfortunately not defined already elsewhere) + _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + else: import ctypes import os + _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + from .find_nvidia_dynamic_library import find_nvidia_dynamic_library -_LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +@functools.cache +def _windows_cuDriverGetVersion() -> int: + handle = win32api.LoadLibrary("nvcuda.dll") + + kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) + GetProcAddress = kernel32.GetProcAddress + GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] + GetProcAddress.restype = ctypes.c_void_p + cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") + assert cuDriverGetVersion + + FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) + cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) + driver_ver = ctypes.c_int() + err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) + assert err == 0 + return driver_ver.value + + +@functools.cache +def _windows_load_with_dll_basename(name: str) -> int: + driver_ver = _windows_cuDriverGetVersion() + del driver_ver # Keeping this here because it will probably be needed in the future. + + if name == "nvJitLink": + dll_name = "nvJitLink_120_0.dll" + elif name == "nvvm": + dll_name = "nvvm64_40_0.dll" + + try: + return win32api.LoadLibrary(dll_name) + except pywintypes.error: + pass + + return None @functools.cache def load_nvidia_dynamic_library(name: str) -> int: # First try using the platform-specific dynamic loader search mechanisms if sys.platform == "win32": - pass # TODO + handle = _windows_load_with_dll_basename(name) + if handle: + return handle else: dl_path = f"lib{name}.so" # Version intentionally no specified. try: From aaa6aff637f6bd076d0b124a39d56eeab5875351 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 12:33:13 -0700 Subject: [PATCH 55/69] Revert "Revert debug changes under .github/workflows" This reverts commit cc6113cce20c5c6124d0676daeccb7db2fffd798. --- .github/workflows/build-and-test.yml | 10 ---------- .github/workflows/test-wheel-linux.yml | 8 ++++---- .github/workflows/test-wheel-windows.yml | 4 ++-- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 288a5624b..b33bbcf65 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -21,14 +21,9 @@ jobs: matrix: host-platform: - linux-64 - - linux-aarch64 - win-64 python-version: - - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" @@ -211,13 +206,8 @@ jobs: matrix: host-platform: - linux-64 - - linux-aarch64 python-version: - - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" cuda-version: # Note: this is for test-time only. - "12.8.0" diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 322f859e3..19c78c8cc 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -205,7 +205,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + pytest -ra -s -v tests/cython fi popd @@ -229,7 +229,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -243,7 +243,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs -v tests/cython + pytest -ra -s -v tests/cython fi popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 4e48590a3..233f56e4f 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -186,7 +186,7 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location @@ -210,7 +210,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs -v tests/ + pytest -ra -s -v tests/ Pop-Location - name: Ensure cuda-python installable From 69967c7e2d1013bdd0df73fd56af3e586a3d9fbe Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 20:10:03 -0700 Subject: [PATCH 56/69] Add debug prints in load_nvidia_dynamic_library() --- .../bindings/_path_finder/load_nvidia_dynamic_library.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 38d23bb54..004edae47 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -58,10 +58,12 @@ def _windows_load_with_dll_basename(name: str) -> int: @functools.cache def load_nvidia_dynamic_library(name: str) -> int: + print(f"\nLOOOK load_nvidia_dynamic_library({name=}) ENTRY", flush=True) # First try using the platform-specific dynamic loader search mechanisms if sys.platform == "win32": handle = _windows_load_with_dll_basename(name) if handle: + print(f"\nLOOOK load_nvidia_dynamic_library({name=}): _windows_load_with_dll_basename", flush=True) return handle else: dl_path = f"lib{name}.so" # Version intentionally no specified. @@ -71,6 +73,7 @@ def load_nvidia_dynamic_library(name: str) -> int: pass else: # Use `cdef void* ptr = ` in cython to convert back to void* + print(f"\nLOOOK load_nvidia_dynamic_library({name=}): ctypes.CDLL({dl_path=})", flush=True) return handle._handle # C unsigned int dl_path = find_nvidia_dynamic_library(name) @@ -80,6 +83,7 @@ def load_nvidia_dynamic_library(name: str) -> int: except pywintypes.error as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* + print(f"\nLOOOK load_nvidia_dynamic_library({name=}): win32api.LoadLibrary({dl_path=})", flush=True) return handle # C signed int, matches win32api.GetProcAddress else: try: @@ -87,4 +91,5 @@ def load_nvidia_dynamic_library(name: str) -> int: except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* + print(f"\nLOOOK load_nvidia_dynamic_library({name=}): ctypes.CDLL({dl_path=})", flush=True) return handle._handle # C unsigned int From db571f49164757c0433910d53926d4fb89f5fa8e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 22:45:13 -0700 Subject: [PATCH 57/69] Report dlopen error for libnvrtc.so.12 --- cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index a0f8a27a0..6642614e8 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -46,6 +46,10 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: + {{if 'Windows' != platform.system()}} + cdef char* err_msg + {{endif}} + global __cuPythonInit if __cuPythonInit: return 0 @@ -97,7 +101,12 @@ cdef int cuPythonInit() except -1 nogil: handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) if handle == NULL: with gil: - raise RuntimeError('Failed to dlopen libnvrtc.so.12') + err_msg = dlfcn.dlerror() + if err_msg == NULL: + err_msg_str = 'Unknown error' + else: + err_msg_str = err_msg.decode('utf-8', errors='backslashreplace') + raise RuntimeError(f'Failed to dlopen libnvrtc.so.12: {err_msg_str}') {{endif}} From 2244556038544bbb6c77eaf8f3e969df546f1dff Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 23:02:27 -0700 Subject: [PATCH 58/69] print("\nLOOOK dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)", flush=True) --- cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index 6642614e8..ee25dac87 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -98,6 +98,8 @@ cdef int cuPythonInit() except -1 nogil: if not handle: raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') {{else}} + with gil: + print("\nLOOOK dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)", flush=True) handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) if handle == NULL: with gil: From fdcd19527ce675d436042593a7369846c3f244ab Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 23:48:41 -0700 Subject: [PATCH 59/69] Revert "Remove LD_LIBRARY_PATH in fetch_ctk/action.yml" This reverts commit 1b1139cda8b56f2fa37c5c0102ee7fe6b5963cab. --- .github/actions/fetch_ctk/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 417da4926..669943296 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,3 +123,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV From d12cbf5fabb6d70a28a4fb934917e36ed6143960 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 5 Apr 2025 23:50:05 -0700 Subject: [PATCH 60/69] Only remove ${CUDA_PATH}/nvvm/lib64 from LD_LIBRARY_PATH --- .github/actions/fetch_ctk/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 669943296..5850b4c78 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,4 +123,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV From 14c72ccd1aa799bab3e91ffba3e9fff373215413 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 6 Apr 2025 19:42:14 -0700 Subject: [PATCH 61/69] Use path_finder.load_nvidia_dynamic_library("nvrtc") from cuda/bindings/_bindings/cynvrtc.pyx.in --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 68 ++----------------- .../load_nvidia_dynamic_library.py | 2 + 2 files changed, 8 insertions(+), 62 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index ee25dac87..2b0f3dc23 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,13 +9,12 @@ # This code was automatically generated with version 12.8.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os -import site -import struct import win32api -from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn +from libc.stdint cimport uintptr_t {{endif}} +from cuda.bindings import path_finder cdef bint __cuPythonInit = False {{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}} @@ -47,7 +46,7 @@ cdef bint __cuPythonInit = False cdef int cuPythonInit() except -1 nogil: {{if 'Windows' != platform.system()}} - cdef char* err_msg + cdef void* handle = NULL {{endif}} global __cuPythonInit @@ -55,66 +54,9 @@ cdef int cuPythonInit() except -1 nogil: return 0 __cuPythonInit = True - # Load library - {{if 'Windows' == platform.system()}} - with gil: - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") - except: - handle = None - - # Else try default search - if not handle: - LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 - try: - handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) - except: - pass - - # Final check if DLLs can be found within pip installations - if not handle: - site_packages = [site.getusersitepackages()] + site.getsitepackages() - for sp in site_packages: - mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 - LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, "nvrtc64_120_0.dll"), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - - # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is - # located in the same mod_path. - # Update PATH environ so that the two dlls can find each other - os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) - except: - pass - - if not handle: - raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') - {{else}} - with gil: - print("\nLOOOK dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)", flush=True) - handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) - if handle == NULL: - with gil: - err_msg = dlfcn.dlerror() - if err_msg == NULL: - err_msg_str = 'Unknown error' - else: - err_msg_str = err_msg.decode('utf-8', errors='backslashreplace') - raise RuntimeError(f'Failed to dlopen libnvrtc.so.12: {err_msg_str}') - {{endif}} - - - # Load function {{if 'Windows' == platform.system()}} with gil: + handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -299,6 +241,8 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} + with gil: + handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 004edae47..a258a0f10 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -45,6 +45,8 @@ def _windows_load_with_dll_basename(name: str) -> int: if name == "nvJitLink": dll_name = "nvJitLink_120_0.dll" + elif name == "nvrtc": + dll_name = "nvrtc64_120_0.dll" elif name == "nvvm": dll_name = "nvvm64_40_0.dll" From 43abec8666a920e56ddc90cdb880ead248d0e45b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 6 Apr 2025 22:01:36 -0700 Subject: [PATCH 62/69] Somewhat ad hoc heuristics for nvidia_cuda_nvrtc wheels. --- .../cuda/bindings/_path_finder/find_nvidia_dynamic_library.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 30a9b68f4..a543a0197 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -32,6 +32,8 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm # Look for a versioned library # Using sort here mainly to make the result deterministic. for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + if ".alt.so." in node or "-builtins" in node: + continue so_name = os.path.join(lib_dir, node) if os.path.isfile(so_name): return so_name @@ -47,6 +49,8 @@ def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + if node.endswith(".alt.dll") or "-builtins" in node: + continue dll_name = os.path.join(bin_dir, node) if os.path.isfile(dll_name): return dll_name From bff8cf023c82c7456af79ef004ba1c30d16b974a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 6 Apr 2025 22:12:50 -0700 Subject: [PATCH 63/69] Remove LD_LIBRARY_PATH entirely from .github/actions/fetch_ctk/action.yml --- .github/actions/fetch_ctk/action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 5850b4c78..417da4926 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,4 +123,3 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV From dcc802a3108a87cd0b3f0ced1e4fa163b5950ebe Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 6 Apr 2025 22:13:50 -0700 Subject: [PATCH 64/69] Remove CUDA_PATH\nvvm\bin in .github/workflows/test-wheel-windows.yml --- .github/workflows/test-wheel-windows.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 233f56e4f..5bfa9bdf0 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -164,13 +164,6 @@ jobs: method: 'network' sub-packages: ${{ env.MINI_CTK_DEPS }} - - name: Update PATH - if: ${{ inputs.local-ctk == '1' }} - run: | - # mimics actual CTK installation - echo $PATH - echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH - - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | From 74d3cfbffda90a7742803efe1841e80acf5ea8ad Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 7 Apr 2025 22:14:10 -0700 Subject: [PATCH 65/69] Revert "Remove LD_LIBRARY_PATH entirely from .github/actions/fetch_ctk/action.yml" This reverts commit bff8cf023c82c7456af79ef004ba1c30d16b974a. --- .github/actions/fetch_ctk/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 417da4926..5850b4c78 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,3 +123,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV From 5d6339d66611a77e59ea1bfd51e59d76700fc625 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 7 Apr 2025 22:14:28 -0700 Subject: [PATCH 66/69] Revert "Somewhat ad hoc heuristics for nvidia_cuda_nvrtc wheels." This reverts commit 43abec8666a920e56ddc90cdb880ead248d0e45b. --- .../cuda/bindings/_path_finder/find_nvidia_dynamic_library.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index a543a0197..30a9b68f4 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -32,8 +32,6 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm # Look for a versioned library # Using sort here mainly to make the result deterministic. for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): - if ".alt.so." in node or "-builtins" in node: - continue so_name = os.path.join(lib_dir, node) if os.path.isfile(so_name): return so_name @@ -49,8 +47,6 @@ def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): - if node.endswith(".alt.dll") or "-builtins" in node: - continue dll_name = os.path.join(bin_dir, node) if os.path.isfile(dll_name): return dll_name From ba093f5700a99153b5c26b224a21aaceb69ae72b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 7 Apr 2025 22:16:32 -0700 Subject: [PATCH 67/69] Restore cuda/bindings/_bindings/cynvrtc.pyx.in as-is on main --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 63 ++++++++++++++++--- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index 2b0f3dc23..a0f8a27a0 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,12 +9,13 @@ # This code was automatically generated with version 12.8.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os +import site +import struct import win32api +from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn -from libc.stdint cimport uintptr_t {{endif}} -from cuda.bindings import path_finder cdef bint __cuPythonInit = False {{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}} @@ -45,18 +46,64 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: - {{if 'Windows' != platform.system()}} - cdef void* handle = NULL - {{endif}} - global __cuPythonInit if __cuPythonInit: return 0 __cuPythonInit = True + # Load library + {{if 'Windows' == platform.system()}} + with gil: + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") + except: + handle = None + + # Else try default search + if not handle: + LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + try: + handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + except: + pass + + # Final check if DLLs can be found within pip installations + if not handle: + site_packages = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_packages: + mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, "nvrtc64_120_0.dll"), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + + # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is + # located in the same mod_path. + # Update PATH environ so that the two dlls can find each other + os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) + except: + pass + + if not handle: + raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') + {{else}} + handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) + if handle == NULL: + with gil: + raise RuntimeError('Failed to dlopen libnvrtc.so.12') + {{endif}} + + + # Load function {{if 'Windows' == platform.system()}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -241,8 +288,6 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} - with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') From 332e15c2d775791f9a6cbefe81bf7633d98e1d82 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 7 Apr 2025 22:18:06 -0700 Subject: [PATCH 68/69] Remove debug print from load_nvidia_dynamic_library.py --- .../bindings/_path_finder/load_nvidia_dynamic_library.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index a258a0f10..692e8e0bc 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -60,12 +60,10 @@ def _windows_load_with_dll_basename(name: str) -> int: @functools.cache def load_nvidia_dynamic_library(name: str) -> int: - print(f"\nLOOOK load_nvidia_dynamic_library({name=}) ENTRY", flush=True) # First try using the platform-specific dynamic loader search mechanisms if sys.platform == "win32": handle = _windows_load_with_dll_basename(name) if handle: - print(f"\nLOOOK load_nvidia_dynamic_library({name=}): _windows_load_with_dll_basename", flush=True) return handle else: dl_path = f"lib{name}.so" # Version intentionally no specified. @@ -75,7 +73,6 @@ def load_nvidia_dynamic_library(name: str) -> int: pass else: # Use `cdef void* ptr = ` in cython to convert back to void* - print(f"\nLOOOK load_nvidia_dynamic_library({name=}): ctypes.CDLL({dl_path=})", flush=True) return handle._handle # C unsigned int dl_path = find_nvidia_dynamic_library(name) @@ -85,7 +82,6 @@ def load_nvidia_dynamic_library(name: str) -> int: except pywintypes.error as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* - print(f"\nLOOOK load_nvidia_dynamic_library({name=}): win32api.LoadLibrary({dl_path=})", flush=True) return handle # C signed int, matches win32api.GetProcAddress else: try: @@ -93,5 +89,4 @@ def load_nvidia_dynamic_library(name: str) -> int: except OSError as e: raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* - print(f"\nLOOOK load_nvidia_dynamic_library({name=}): ctypes.CDLL({dl_path=})", flush=True) return handle._handle # C unsigned int From 8f69f832af51c393601b09c2fe29d874e9abb057 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 7 Apr 2025 22:19:39 -0700 Subject: [PATCH 69/69] Reapply "Revert debug changes under .github/workflows" This reverts commit aaa6aff637f6bd076d0b124a39d56eeab5875351. --- .github/workflows/build-and-test.yml | 10 ++++++++++ .github/workflows/test-wheel-linux.yml | 8 ++++---- .github/workflows/test-wheel-windows.yml | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b33bbcf65..288a5624b 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -21,9 +21,14 @@ jobs: matrix: host-platform: - linux-64 + - linux-aarch64 - win-64 python-version: + - "3.13" - "3.12" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" @@ -206,8 +211,13 @@ jobs: matrix: host-platform: - linux-64 + - linux-aarch64 python-version: + - "3.13" - "3.12" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for test-time only. - "12.8.0" diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 19c78c8cc..322f859e3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -205,7 +205,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -ra -s -v tests/cython + pytest -rxXs -v tests/cython fi popd @@ -229,7 +229,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -243,7 +243,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -ra -s -v tests/cython + pytest -rxXs -v tests/cython fi popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 5bfa9bdf0..948d2fae6 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -179,7 +179,7 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location @@ -203,7 +203,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -ra -s -v tests/ + pytest -rxXs -v tests/ Pop-Location - name: Ensure cuda-python installable