diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 669943296..5850b4c78 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,4 +123,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 4e48590a3..948d2fae6 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -164,13 +164,6 @@ jobs: method: 'network' sub-packages: ${{ env.MINI_CTK_DEPS }} - - name: Update PATH - if: ${{ inputs.local-ctk == '1' }} - run: | - # mimics actual CTK installation - echo $PATH - echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH - - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 9961a2105..9d21a3e10 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 12.0.1 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvjitlink_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -52,17 +52,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index c8c7e6b29..f86972216 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -42,54 +39,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink_{suffix}0_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvJitLink') - - assert handle != 0 - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: @@ -98,15 +50,16 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -114,7 +67,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 64e78e75a..33ba8e610 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvvm_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -51,16 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvvm_dso_version_suffix(driver_ver): - so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') - return handle + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 76ce23254..6349fa5a1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvvm_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -40,54 +37,9 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvvm_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = "nvvm64_40_0" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvvm') - - assert handle != 0 - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: @@ -96,15 +48,16 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -112,7 +65,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cac7846ff..a4b71c531 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) -cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 0a693c052..7fc77b22c 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass - - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): - if 12000 <= driver_ver < 13000: - return ('12', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') - - -cdef tuple get_nvvm_dso_version_suffix(int driver_ver): - if 11000 <= driver_ver < 11020: - return ('3', '') - if 11020 <= driver_ver < 13000: - return ('4', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py new file mode 100644 index 000000000..e27e6f54b --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py @@ -0,0 +1,403 @@ +import os +import platform +import re +import site +import sys +import traceback +import warnings +from collections import namedtuple +from pathlib import Path + +from .findlib import find_file, find_lib + +IS_WIN32 = sys.platform.startswith("win32") + +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) + + +def _get_numba_CUDA_INCLUDE_PATH(): + # From numba/numba/core/config.py + + def _readenv(name, ctor, default): + value = os.environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn( # noqa: B028 + f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + "parsed.\nThe parse failed with exception:\n" + f"{traceback.format_exc()}", + RuntimeWarning, + ) + return default + + if IS_WIN32: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: # noqa: SIM108 + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) + return CUDA_INCLUDE_PATH + + +config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() + + +def _find_valid_path(options): + """Find valid path from *options*, which is a list of 2-tuple of + (name, path). Return first pair where *path* is not None. + If no valid path is found, return ('', None) + """ + for by, data in options: + if data is not None: + return by, data + else: + return "", None + + +def _get_libdevice_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), + ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice()), + ("NVIDIA NVCC Wheel", get_libdevice_wheel()), + ] + libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") + if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): + options.append(("System", libdevice_ctk_dir)) + + by, libdir = _find_valid_path(options) + return by, libdir + + +def _nvvm_lib_dir(): + if IS_WIN32: + return "nvvm", "bin" + else: + return "nvvm", "lib64" + + +def _get_nvvm_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ] + # need to ensure nvvm dir actually exists + nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) + if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): + options.append(("System", nvvm_ctk_dir)) + + by, path = _find_valid_path(options) + return by, path + + +def _get_nvvm_wheel(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + for sp in site_paths: + # The SONAME is taken based on public CTK 12.x releases + if sys.platform.startswith("linux"): + dso_dir = "lib64" + # Hack: libnvvm from Linux wheel + # does not have any soname (CUDAINST-3183) + dso_path = "libnvvm.so" + elif sys.platform.startswith("win32"): + dso_dir = "bin" + dso_path = "nvvm64_40_0.dll" + else: + raise AssertionError() + + if sp is not None: + dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) + dso_path = os.path.join(dso_dir, dso_path) + if os.path.exists(dso_path): + return str(Path(dso_path).parent) + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + out = os.path.join(libdir, "libdevice.10.bc") + else: + # Search for pattern + pat = r"libdevice(\.\d+)*\.bc$" + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + +def _cudalib_path(): + if IS_WIN32: + return "bin" + else: + return "lib64" + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ("lib", "x64") + else: + return ("lib64",) + + +def _get_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def get_system_ctk(*subdirs): + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" + # Linux? + if sys.platform.startswith("linux"): + # Is cuda alias to /usr/local/cuda? + # We are intentionally not getting versioned cuda installation. + base = "/usr/local/cuda" + if os.path.exists(base): + return os.path.join(base, *subdirs) + + +def get_conda_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + # Assume the existence of NVVM to imply cudatoolkit installed + paths = find_lib("nvvm") + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_nvvm_ctk(): + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + # Assume the existence of NVVM in the conda env implies that a CUDA toolkit + # conda package is installed. + + # First, try the location used on Linux and the Windows 11.x packages + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that fails, try the location used for Windows 12.x packages + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that doesn't exist either, assume we don't have the NVIDIA + # conda package + return + + paths = find_lib("nvvm", libdir=libdir) + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, "libdevice") + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = "bin" if IS_WIN32 else "lib" + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 + # Location specific to CUDA 11.x packages on Windows + dirs = ("Lib", "x64") + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ("lib",) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + +def get_cuda_home(*subdirs): + """Get paths of CUDA_HOME. + If *subdirs* are the subdirectory name to be appended in the resulting + path. + """ + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + # Try Windows CUDA installation without Anaconda + cuda_home = os.environ.get("CUDA_PATH") + if cuda_home is not None: + return os.path.join(cuda_home, *subdirs) + + +def _get_nvvm_path(): + by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + path = os.path.join(path, "libnvvm.so") + else: + candidates = find_lib("nvvm", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, "_cached_result"): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + "nvvm": _get_nvvm_path(), + "libdevice": _get_libdevice_paths(), + "cudalib_dir": _get_cudalib_dir(), + "static_cudalib_dir": _get_static_cudalib_dir(), + "include_dir": _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} + elif system == "Windows": + arch_to_targets = { + "AMD64": "x64", + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join(sys.prefix, "Library", "include") + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join(sys.prefix, "targets", target_name, "include") + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, "include") + + if ( + os.path.exists(include_dir) + and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) + ): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ("Conda environment (NVIDIA package)", get_conda_include_dir()), + ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py new file mode 100644 index 000000000..30a9b68f4 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -0,0 +1,139 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import glob +import os + +from .cuda_paths import IS_WIN32, get_cuda_paths +from .sys_path_find_sub_dirs import sys_path_find_sub_dirs + + +def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): + error_messages.append(f"No such file: {file_wild}") + for sub_dir in sys_path_find_sub_dirs(sub_dirs): + attachments.append(f' listdir("{sub_dir}"):') + for node in sorted(os.listdir(sub_dir)): + attachments.append(f" {node}") + + +def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") + else: + nvidia_sub_dirs = ("nvidia", "*", "lib") + file_wild = so_basename + "*" + for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + # First look for an exact match + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + so_name = os.path.join(lib_dir, node) + if os.path.isfile(so_name): + return so_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") + else: + nvidia_sub_dirs = ("nvidia", "*", "bin") + file_wild = libname + "*.dll" + for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + dll_name = os.path.join(bin_dir, node) + if os.path.isfile(dll_name): + return dll_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _get_cuda_paths_info(key, error_messages): + env_path_tuple = get_cuda_paths()[key] + if not env_path_tuple: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') + return None + if not env_path_tuple.info: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') + return None + return env_path_tuple.info + + +def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + primary_so_dir = cudalib_dir + "/" + candidate_so_dirs = [primary_so_dir] + libs = ["/lib/", "/lib64/"] + for _ in range(2): + alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) + if alt_dir not in candidate_so_dirs: + candidate_so_dirs.append(alt_dir) + libs.reverse() + candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] + error_messages = [] + for so_name in candidate_so_names: + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + for so_dirname in candidate_so_dirs: + attachments.append(f' listdir("{so_dirname}"):') + if not os.path.isdir(so_dirname): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(so_dirname)): + attachments.append(f" {node}") + return None + + +def _find_dll_using_cudalib_dir(libname, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + file_wild = libname + "*.dll" + for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): + dll_name = os.path.join(cudalib_dir, node) + if os.path.isfile(dll_name): + return dll_name + error_messages.append(f"No such file: {file_wild}") + attachments.append(f' listdir("{cudalib_dir}"):') + for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f" {node}") + return None + + +@functools.cache +def find_nvidia_dynamic_library(name: str) -> str: + error_messages = [] + attachments = [] + + if IS_WIN32: + dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) + if dll_name is None: + if name == "nvvm": + dll_name = _get_cuda_paths_info("nvvm", error_messages) + else: + dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) + if dll_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}") + return dll_name + + so_basename = f"lib{name}.so" + so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) + if so_name is None: + if name == "nvvm": + so_name = _get_cuda_paths_info("nvvm", error_messages) + else: + so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) + if so_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") + return so_name diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py new file mode 100644 index 000000000..4de57c905 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py @@ -0,0 +1,69 @@ +# Forked from: +# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py + +import os +import re +import sys + + +def get_lib_dirs(): + """ + Anaconda specific + """ + if sys.platform == "win32": + # on windows, historically `DLLs` has been used for CUDA libraries, + # since approximately CUDA 9.2, `Library\bin` has been used. + dirnames = ["DLLs", os.path.join("Library", "bin")] + else: + dirnames = [ + "lib", + ] + libdirs = [os.path.join(sys.prefix, x) for x in dirnames] + return libdirs + + +DLLNAMEMAP = { + "linux": r"lib%(name)s\.so\.%(ver)s$", + "linux2": r"lib%(name)s\.so\.%(ver)s$", + "linux-static": r"lib%(name)s\.a$", + "darwin": r"lib%(name)s\.%(ver)s\.dylib$", + "win32": r"%(name)s%(ver)s\.dll$", + "win32-static": r"%(name)s\.lib$", + "bsd": r"lib%(name)s\.so\.%(ver)s$", +} + +RE_VER = r"[0-9]*([_\.][0-9]+)*" + + +def find_lib(libname, libdir=None, platform=None, static=False): + platform = platform or sys.platform + platform = "bsd" if "bsd" in platform else platform + if static: + platform = f"{platform}-static" + if platform not in DLLNAMEMAP: + # Return empty list if platform name is undefined. + # Not all platforms define their static library paths. + return [] + pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} + regex = re.compile(pat) + return find_file(regex, libdir) + + +def find_file(pat, libdir=None): + if libdir is None: + libdirs = get_lib_dirs() + elif isinstance(libdir, str): + libdirs = [ + libdir, + ] + else: + libdirs = list(libdir) + files = [] + for ldir in libdirs: + try: + entries = os.listdir(ldir) + except FileNotFoundError: + continue + candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] + files.extend([c for c in candidates if os.path.isfile(c)]) + return files diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py new file mode 100644 index 000000000..692e8e0bc --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -0,0 +1,92 @@ +import functools +import sys + +if sys.platform == "win32": + import ctypes.wintypes + + import pywintypes + import win32api + + # Mirrors WinBase.h (unfortunately not defined already elsewhere) + _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + +else: + import ctypes + import os + + _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + + +@functools.cache +def _windows_cuDriverGetVersion() -> int: + handle = win32api.LoadLibrary("nvcuda.dll") + + kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) + GetProcAddress = kernel32.GetProcAddress + GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] + GetProcAddress.restype = ctypes.c_void_p + cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") + assert cuDriverGetVersion + + FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) + cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) + driver_ver = ctypes.c_int() + err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) + assert err == 0 + return driver_ver.value + + +@functools.cache +def _windows_load_with_dll_basename(name: str) -> int: + driver_ver = _windows_cuDriverGetVersion() + del driver_ver # Keeping this here because it will probably be needed in the future. + + if name == "nvJitLink": + dll_name = "nvJitLink_120_0.dll" + elif name == "nvrtc": + dll_name = "nvrtc64_120_0.dll" + elif name == "nvvm": + dll_name = "nvvm64_40_0.dll" + + try: + return win32api.LoadLibrary(dll_name) + except pywintypes.error: + pass + + return None + + +@functools.cache +def load_nvidia_dynamic_library(name: str) -> int: + # First try using the platform-specific dynamic loader search mechanisms + if sys.platform == "win32": + handle = _windows_load_with_dll_basename(name) + if handle: + return handle + else: + dl_path = f"lib{name}.so" # Version intentionally no specified. + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError: + pass + else: + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int + + dl_path = find_nvidia_dynamic_library(name) + if sys.platform == "win32": + try: + handle = win32api.LoadLibrary(dl_path) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle # C signed int, matches win32api.GetProcAddress + else: + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError as e: + raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py new file mode 100644 index 000000000..d2da726c9 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py @@ -0,0 +1,40 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os +import sys + + +@functools.cache +def _impl(sys_path, sub_dirs): + results = [] + for base in sys_path: + stack = [(base, 0)] # (current_path, index into sub_dirs) + while stack: + current_path, idx = stack.pop() + if idx == len(sub_dirs): + if os.path.isdir(current_path): + results.append(current_path) + continue + + sub = sub_dirs[idx] + if sub == "*": + try: + entries = sorted(os.listdir(current_path)) + except OSError: + continue + for entry in entries: + entry_path = os.path.join(current_path, entry) + if os.path.isdir(entry_path): + stack.append((entry_path, idx + 1)) + else: + next_path = os.path.join(current_path, sub) + if os.path.isdir(next_path): + stack.append((next_path, idx + 1)) + return results + + +def sys_path_find_sub_dirs(sub_dirs): + return _impl(tuple(sys.path), tuple(sub_dirs)) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py new file mode 100644 index 000000000..21aeb4b36 --- /dev/null +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -0,0 +1,37 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings._path_finder.cuda_paths import ( + get_conda_ctk, + get_conda_include_dir, + get_cuda_home, + get_cuda_paths, + get_current_cuda_target_name, + get_debian_pkg_libdevice, + get_libdevice_wheel, + get_nvidia_cudalib_ctk, + get_nvidia_libdevice_ctk, + get_nvidia_nvvm_ctk, + get_nvidia_static_cudalib_ctk, + get_system_ctk, +) +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library + +__all__ = [ + "find_nvidia_dynamic_library", + "load_nvidia_dynamic_library", + "get_conda_ctk", + "get_conda_include_dir", + "get_cuda_home", + "get_cuda_paths", + "get_current_cuda_target_name", + "get_debian_pkg_libdevice", + "get_libdevice_wheel", + "get_nvidia_cudalib_ctk", + "get_nvidia_libdevice_ctk", + "get_nvidia_nvvm_ctk", + "get_nvidia_static_cudalib_ctk", + "get_system_ctk", +] diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py new file mode 100644 index 000000000..e9245a5be --- /dev/null +++ b/cuda_bindings/tests/path_finder.py @@ -0,0 +1,9 @@ +from cuda.bindings import path_finder + +paths = path_finder.get_cuda_paths() + +for k, v in paths.items(): + print(f"{k}: {v}", flush=True) + +print(path_finder.find_nvidia_dynamic_library("nvvm")) +print(path_finder.find_nvidia_dynamic_library("nvJitLink")) diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py new file mode 100644 index 000000000..3297ce39e --- /dev/null +++ b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py @@ -0,0 +1,72 @@ +import os + +import pytest + +from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl + + +@pytest.fixture +def test_tree(tmp_path): + # Build: + # tmp_path/ + # sys1/nvidia/foo/lib + # sys1/nvidia/bar/lib + # sys2/nvidia/baz/nvvm/lib64 + base = tmp_path + (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) + (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) + (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) + + return { + "sys_path": ( + str(base / "sys1"), + str(base / "sys2"), + str(base / "nonexistent"), # should be ignored + ), + "base": base, + } + + +def test_exact_match(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "foo", "lib")) + expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] + assert result == expected + + +def test_single_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "lib")) + expected = [ + str(base / "sys1" / "nvidia" / "bar" / "lib"), + str(base / "sys1" / "nvidia" / "foo" / "lib"), + ] + assert sorted(result) == sorted(expected) + + +def test_double_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64")) + expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] + assert result == expected + + +def test_no_match(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ("nvidia", "nonexistent", "lib")) + assert result == [] + + +def test_empty_sys_path(): + result = _impl((), ("nvidia", "*", "lib")) + assert result == [] + + +def test_empty_sub_dirs(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ()) + expected = [p for p in sys_path if os.path.isdir(p)] + assert sorted(result) == sorted(expected)