diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml
index 669943296..5850b4c78 100644
--- a/.github/actions/fetch_ctk/action.yml
+++ b/.github/actions/fetch_ctk/action.yml
@@ -123,4 +123,4 @@ runs:
         echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV
         echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV
         echo "${CUDA_PATH}/bin" >> $GITHUB_PATH
-        echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV
+        echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV
diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 4e48590a3..948d2fae6 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -164,13 +164,6 @@ jobs:
           method: 'network'
           sub-packages: ${{ env.MINI_CTK_DEPS }}
 
-      - name: Update PATH
-        if: ${{ inputs.local-ctk == '1' }}
-        run: |
-          # mimics actual CTK installation
-          echo $PATH
-          echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH
-
       - name: Run cuda.bindings tests
         if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
         run: |
diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
index 9961a2105..9d21a3e10 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
@@ -4,12 +4,12 @@
 #
 # This code was automatically generated across versions from 12.0.1 to 12.8.0. Do not modify it directly.
 
-from libc.stdint cimport intptr_t
-
-from .utils cimport get_nvjitlink_dso_version_suffix
+from libc.stdint cimport intptr_t, uintptr_t
 
 from .utils import FunctionNotFoundError, NotSupportedError
 
+from cuda.bindings import path_finder
+
 ###############################################################################
 # Extern
 ###############################################################################
@@ -52,17 +52,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL
 cdef void* __nvJitLinkVersion = NULL
 
 
-cdef void* load_library(const int driver_ver) except* with gil:
-    cdef void* handle
-    for suffix in get_nvjitlink_dso_version_suffix(driver_ver):
-        so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix)
-        handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL)
-        if handle != NULL:
-            break
-    else:
-        err_msg = dlerror()
-        raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})')
-    return handle
+cdef void* load_library(int driver_ver) except* with gil:
+    cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink")
+    return <void*>handle
 
 
 cdef int _check_or_init_nvjitlink() except -1 nogil:
diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
index c8c7e6b29..f86972216 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
@@ -6,12 +6,9 @@
 
 from libc.stdint cimport intptr_t
 
-from .utils cimport get_nvjitlink_dso_version_suffix
-
 from .utils import FunctionNotFoundError, NotSupportedError
 
-import os
-import site
+from cuda.bindings import path_finder
 
 import win32api
 
@@ -42,54 +39,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL
 cdef void* __nvJitLinkVersion = NULL
 
 
-cdef inline list get_site_packages():
-    return [site.getusersitepackages()] + site.getsitepackages()
-
-
-cdef load_library(const int driver_ver):
-    handle = 0
-
-    for suffix in get_nvjitlink_dso_version_suffix(driver_ver):
-        if len(suffix) == 0:
-            continue
-        dll_name = f"nvJitLink_{suffix}0_0.dll"
-
-        # First check if the DLL has been loaded by 3rd parties
-        try:
-            handle = win32api.GetModuleHandle(dll_name)
-        except:
-            pass
-        else:
-            break
-
-        # Next, check if DLLs are installed via pip
-        for sp in get_site_packages():
-            mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin")
-            if not os.path.isdir(mod_path):
-                continue
-            os.add_dll_directory(mod_path)
-        try:
-            handle = win32api.LoadLibraryEx(
-                # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
-                os.path.join(mod_path, dll_name),
-                0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
-        except:
-            pass
-        else:
-            break
-
-        # Finally, try default search
-        try:
-            handle = win32api.LoadLibrary(dll_name)
-        except:
-            pass
-        else:
-            break
-    else:
-        raise RuntimeError('Failed to load nvJitLink')
-
-    assert handle != 0
-    return handle
+cdef void* load_library(int driver_ver) except* with gil:
+    cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink")
+    return <void*>handle
 
 
 cdef int _check_or_init_nvjitlink() except -1 nogil:
@@ -98,15 +50,16 @@ cdef int _check_or_init_nvjitlink() except -1 nogil:
         return 0
 
     cdef int err, driver_ver
+    cdef intptr_t handle
     with gil:
         # Load driver to check version
         try:
-            handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
+            nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
         except Exception as e:
             raise NotSupportedError(f'CUDA driver is not found ({e})')
         global __cuDriverGetVersion
         if __cuDriverGetVersion == NULL:
-            __cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(handle, 'cuDriverGetVersion')
+            __cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion')
             if __cuDriverGetVersion == NULL:
                 raise RuntimeError('something went wrong')
         err = (<int (*)(int*) noexcept nogil>__cuDriverGetVersion)(&driver_ver)
@@ -114,7 +67,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil:
             raise RuntimeError('something went wrong')
 
         # Load library
-        handle = load_library(driver_ver)
+        handle = <intptr_t>load_library(driver_ver)
 
         # Load function
         global __nvJitLinkCreate
diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
index 64e78e75a..33ba8e610 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
@@ -4,12 +4,12 @@
 #
 # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly.
 
-from libc.stdint cimport intptr_t
-
-from .utils cimport get_nvvm_dso_version_suffix
+from libc.stdint cimport intptr_t, uintptr_t
 
 from .utils import FunctionNotFoundError, NotSupportedError
 
+from cuda.bindings import path_finder
+
 ###############################################################################
 # Extern
 ###############################################################################
@@ -51,16 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL
 
 
 cdef void* load_library(const int driver_ver) except* with gil:
-    cdef void* handle
-    for suffix in get_nvvm_dso_version_suffix(driver_ver):
-        so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix)
-        handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL)
-        if handle != NULL:
-            break
-    else:
-        err_msg = dlerror()
-        raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})')
-    return handle
+    cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm")
+    return <void*>handle
 
 
 cdef int _check_or_init_nvvm() except -1 nogil:
diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
index 76ce23254..6349fa5a1 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
@@ -6,12 +6,9 @@
 
 from libc.stdint cimport intptr_t
 
-from .utils cimport get_nvvm_dso_version_suffix
-
 from .utils import FunctionNotFoundError, NotSupportedError
 
-import os
-import site
+from cuda.bindings import path_finder
 
 import win32api
 
@@ -40,54 +37,9 @@ cdef void* __nvvmGetProgramLogSize = NULL
 cdef void* __nvvmGetProgramLog = NULL
 
 
-cdef inline list get_site_packages():
-    return [site.getusersitepackages()] + site.getsitepackages()
-
-
-cdef load_library(const int driver_ver):
-    handle = 0
-
-    for suffix in get_nvvm_dso_version_suffix(driver_ver):
-        if len(suffix) == 0:
-            continue
-        dll_name = "nvvm64_40_0"
-
-        # First check if the DLL has been loaded by 3rd parties
-        try:
-            handle = win32api.GetModuleHandle(dll_name)
-        except:
-            pass
-        else:
-            break
-
-        # Next, check if DLLs are installed via pip
-        for sp in get_site_packages():
-            mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin")
-            if not os.path.isdir(mod_path):
-                continue
-            os.add_dll_directory(mod_path)
-        try:
-            handle = win32api.LoadLibraryEx(
-                # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
-                os.path.join(mod_path, dll_name),
-                0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
-        except:
-            pass
-        else:
-            break
-
-        # Finally, try default search
-        try:
-            handle = win32api.LoadLibrary(dll_name)
-        except:
-            pass
-        else:
-            break
-    else:
-        raise RuntimeError('Failed to load nvvm')
-
-    assert handle != 0
-    return handle
+cdef void* load_library(int driver_ver) except* with gil:
+    cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm")
+    return <void*>handle
 
 
 cdef int _check_or_init_nvvm() except -1 nogil:
@@ -96,15 +48,16 @@ cdef int _check_or_init_nvvm() except -1 nogil:
         return 0
 
     cdef int err, driver_ver
+    cdef intptr_t handle
     with gil:
         # Load driver to check version
         try:
-            handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
+            nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
         except Exception as e:
             raise NotSupportedError(f'CUDA driver is not found ({e})')
         global __cuDriverGetVersion
         if __cuDriverGetVersion == NULL:
-            __cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(handle, 'cuDriverGetVersion')
+            __cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion')
             if __cuDriverGetVersion == NULL:
                 raise RuntimeError('something went wrong')
         err = (<int (*)(int*) noexcept nogil>__cuDriverGetVersion)(&driver_ver)
@@ -112,7 +65,7 @@ cdef int _check_or_init_nvvm() except -1 nogil:
             raise RuntimeError('something went wrong')
 
         # Load library
-        handle = load_library(driver_ver)
+        handle = <intptr_t>load_library(driver_ver)
 
         # Load function
         global __nvvmVersion
diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd
index cac7846ff..a4b71c531 100644
--- a/cuda_bindings/cuda/bindings/_internal/utils.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd
@@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj,
 
 cdef bint is_nested_sequence(data)
 cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except*
-
-cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver)
-cdef tuple get_nvvm_dso_version_suffix(int driver_ver)
diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx
index 0a693c052..7fc77b22c 100644
--- a/cuda_bindings/cuda/bindings/_internal/utils.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx
@@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj,
 class FunctionNotFoundError(RuntimeError): pass
 
 class NotSupportedError(RuntimeError): pass
-
-
-cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver):
-    if 12000 <= driver_ver < 13000:
-        return ('12', '')
-    raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported')
-
-
-cdef tuple get_nvvm_dso_version_suffix(int driver_ver):
-    if 11000 <= driver_ver < 11020:
-        return ('3', '')
-    if 11020 <= driver_ver < 13000:
-        return ('4', '')
-    raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported')
diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py
new file mode 100644
index 000000000..e27e6f54b
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py
@@ -0,0 +1,403 @@
+import os
+import platform
+import re
+import site
+import sys
+import traceback
+import warnings
+from collections import namedtuple
+from pathlib import Path
+
+from .findlib import find_file, find_lib
+
+IS_WIN32 = sys.platform.startswith("win32")
+
+_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"])
+
+
+def _get_numba_CUDA_INCLUDE_PATH():
+    # From numba/numba/core/config.py
+
+    def _readenv(name, ctor, default):
+        value = os.environ.get(name)
+        if value is None:
+            return default() if callable(default) else default
+        try:
+            return ctor(value)
+        except Exception:
+            warnings.warn(  # noqa: B028
+                f"Environment variable '{name}' is defined but "
+                f"its associated value '{value}' could not be "
+                "parsed.\nThe parse failed with exception:\n"
+                f"{traceback.format_exc()}",
+                RuntimeWarning,
+            )
+            return default
+
+    if IS_WIN32:
+        cuda_path = os.environ.get("CUDA_PATH")
+        if cuda_path:  # noqa: SIM108
+            default_cuda_include_path = os.path.join(cuda_path, "include")
+        else:
+            default_cuda_include_path = "cuda_include_not_found"
+    else:
+        default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include")
+    CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path)
+    return CUDA_INCLUDE_PATH
+
+
+config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH()
+
+
+def _find_valid_path(options):
+    """Find valid path from *options*, which is a list of 2-tuple of
+    (name, path).  Return first pair where *path* is not None.
+    If no valid path is found, return ('<unknown>', None)
+    """
+    for by, data in options:
+        if data is not None:
+            return by, data
+    else:
+        return "<unknown>", None
+
+
+def _get_libdevice_path_decision():
+    options = [
+        ("Conda environment", get_conda_ctk()),
+        ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()),
+        ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")),
+        ("Debian package", get_debian_pkg_libdevice()),
+        ("NVIDIA NVCC Wheel", get_libdevice_wheel()),
+    ]
+    libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice")
+    if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir):
+        options.append(("System", libdevice_ctk_dir))
+
+    by, libdir = _find_valid_path(options)
+    return by, libdir
+
+
+def _nvvm_lib_dir():
+    if IS_WIN32:
+        return "nvvm", "bin"
+    else:
+        return "nvvm", "lib64"
+
+
+def _get_nvvm_path_decision():
+    options = [
+        ("Conda environment", get_conda_ctk()),
+        ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()),
+        ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())),
+        ("NVIDIA NVCC Wheel", _get_nvvm_wheel()),
+    ]
+    # need to ensure nvvm dir actually exists
+    nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir())
+    if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir):
+        options.append(("System", nvvm_ctk_dir))
+
+    by, path = _find_valid_path(options)
+    return by, path
+
+
+def _get_nvvm_wheel():
+    site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None]
+    for sp in site_paths:
+        # The SONAME is taken based on public CTK 12.x releases
+        if sys.platform.startswith("linux"):
+            dso_dir = "lib64"
+            # Hack: libnvvm from Linux wheel
+            # does not have any soname (CUDAINST-3183)
+            dso_path = "libnvvm.so"
+        elif sys.platform.startswith("win32"):
+            dso_dir = "bin"
+            dso_path = "nvvm64_40_0.dll"
+        else:
+            raise AssertionError()
+
+        if sp is not None:
+            dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir)
+            dso_path = os.path.join(dso_dir, dso_path)
+            if os.path.exists(dso_path):
+                return str(Path(dso_path).parent)
+
+
+def _get_libdevice_paths():
+    by, libdir = _get_libdevice_path_decision()
+    if by == "NVIDIA NVCC Wheel":
+        # The NVVM path is a directory, not a file
+        out = os.path.join(libdir, "libdevice.10.bc")
+    else:
+        # Search for pattern
+        pat = r"libdevice(\.\d+)*\.bc$"
+        candidates = find_file(re.compile(pat), libdir)
+        # Keep only the max (most recent version) of the bitcode files.
+        out = max(candidates, default=None)
+    return _env_path_tuple(by, out)
+
+
+def _cudalib_path():
+    if IS_WIN32:
+        return "bin"
+    else:
+        return "lib64"
+
+
+def _cuda_home_static_cudalib_path():
+    if IS_WIN32:
+        return ("lib", "x64")
+    else:
+        return ("lib64",)
+
+
+def _get_cudalib_dir_path_decision():
+    options = [
+        ("Conda environment", get_conda_ctk()),
+        ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()),
+        ("CUDA_HOME", get_cuda_home(_cudalib_path())),
+        ("System", get_system_ctk(_cudalib_path())),
+    ]
+    by, libdir = _find_valid_path(options)
+    return by, libdir
+
+
+def _get_static_cudalib_dir_path_decision():
+    options = [
+        ("Conda environment", get_conda_ctk()),
+        ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()),
+        ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())),
+        ("System", get_system_ctk(_cudalib_path())),
+    ]
+    by, libdir = _find_valid_path(options)
+    return by, libdir
+
+
+def _get_cudalib_dir():
+    by, libdir = _get_cudalib_dir_path_decision()
+    return _env_path_tuple(by, libdir)
+
+
+def _get_static_cudalib_dir():
+    by, libdir = _get_static_cudalib_dir_path_decision()
+    return _env_path_tuple(by, libdir)
+
+
+def get_system_ctk(*subdirs):
+    """Return path to system-wide cudatoolkit; or, None if it doesn't exist."""
+    # Linux?
+    if sys.platform.startswith("linux"):
+        # Is cuda alias to /usr/local/cuda?
+        # We are intentionally not getting versioned cuda installation.
+        base = "/usr/local/cuda"
+        if os.path.exists(base):
+            return os.path.join(base, *subdirs)
+
+
+def get_conda_ctk():
+    """Return path to directory containing the shared libraries of cudatoolkit."""
+    is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
+    if not is_conda_env:
+        return
+    # Assume the existence of NVVM to imply cudatoolkit installed
+    paths = find_lib("nvvm")
+    if not paths:
+        return
+    # Use the directory name of the max path
+    return os.path.dirname(max(paths))
+
+
+def get_nvidia_nvvm_ctk():
+    """Return path to directory containing the NVVM shared library."""
+    is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
+    if not is_conda_env:
+        return
+
+    # Assume the existence of NVVM in the conda env implies that a CUDA toolkit
+    # conda package is installed.
+
+    # First, try the location used on Linux and the Windows 11.x packages
+    libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
+    if not os.path.exists(libdir) or not os.path.isdir(libdir):
+        # If that fails, try the location used for Windows 12.x packages
+        libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
+        if not os.path.exists(libdir) or not os.path.isdir(libdir):
+            # If that doesn't exist either, assume we don't have the NVIDIA
+            # conda package
+            return
+
+    paths = find_lib("nvvm", libdir=libdir)
+    if not paths:
+        return
+    # Use the directory name of the max path
+    return os.path.dirname(max(paths))
+
+
+def get_nvidia_libdevice_ctk():
+    """Return path to directory containing the libdevice library."""
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+    nvvm_dir = os.path.dirname(nvvm_ctk)
+    return os.path.join(nvvm_dir, "libdevice")
+
+
+def get_nvidia_cudalib_ctk():
+    """Return path to directory containing the shared libraries of cudatoolkit."""
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+    env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
+    subdir = "bin" if IS_WIN32 else "lib"
+    return os.path.join(env_dir, subdir)
+
+
+def get_nvidia_static_cudalib_ctk():
+    """Return path to directory containing the static libraries of cudatoolkit."""
+    nvvm_ctk = get_nvidia_nvvm_ctk()
+    if not nvvm_ctk:
+        return
+
+    if IS_WIN32 and ("Library" not in nvvm_ctk):  # noqa: SIM108
+        # Location specific to CUDA 11.x packages on Windows
+        dirs = ("Lib", "x64")
+    else:
+        # Linux, or Windows with CUDA 12.x packages
+        dirs = ("lib",)
+
+    env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
+    return os.path.join(env_dir, *dirs)
+
+
+def get_cuda_home(*subdirs):
+    """Get paths of CUDA_HOME.
+    If *subdirs* are the subdirectory name to be appended in the resulting
+    path.
+    """
+    cuda_home = os.environ.get("CUDA_HOME")
+    if cuda_home is None:
+        # Try Windows CUDA installation without Anaconda
+        cuda_home = os.environ.get("CUDA_PATH")
+    if cuda_home is not None:
+        return os.path.join(cuda_home, *subdirs)
+
+
+def _get_nvvm_path():
+    by, path = _get_nvvm_path_decision()
+    if by == "NVIDIA NVCC Wheel":
+        # The NVVM path is a directory, not a file
+        path = os.path.join(path, "libnvvm.so")
+    else:
+        candidates = find_lib("nvvm", path)
+        path = max(candidates) if candidates else None
+    return _env_path_tuple(by, path)
+
+
+def get_cuda_paths():
+    """Returns a dictionary mapping component names to a 2-tuple
+    of (source_variable, info).
+
+    The returned dictionary will have the following keys and infos:
+    - "nvvm": file_path
+    - "libdevice": List[Tuple[arch, file_path]]
+    - "cudalib_dir": directory_path
+
+    Note: The result of the function is cached.
+    """
+    # Check cache
+    if hasattr(get_cuda_paths, "_cached_result"):
+        return get_cuda_paths._cached_result
+    else:
+        # Not in cache
+        d = {
+            "nvvm": _get_nvvm_path(),
+            "libdevice": _get_libdevice_paths(),
+            "cudalib_dir": _get_cudalib_dir(),
+            "static_cudalib_dir": _get_static_cudalib_dir(),
+            "include_dir": _get_include_dir(),
+        }
+        # Cache result
+        get_cuda_paths._cached_result = d
+        return d
+
+
+def get_debian_pkg_libdevice():
+    """
+    Return the Debian NVIDIA Maintainers-packaged libdevice location, if it
+    exists.
+    """
+    pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice"
+    if not os.path.exists(pkg_libdevice_location):
+        return None
+    return pkg_libdevice_location
+
+
+def get_libdevice_wheel():
+    nvvm_path = _get_nvvm_wheel()
+    if nvvm_path is None:
+        return None
+    nvvm_path = Path(nvvm_path)
+    libdevice_path = nvvm_path.parent / "libdevice"
+
+    return str(libdevice_path)
+
+
+def get_current_cuda_target_name():
+    """Determine conda's CTK target folder based on system and machine arch.
+
+    CTK's conda package delivers headers based on its architecture type. For example,
+    `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
+    `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
+    nuances at cudart's conda feedstock:
+    https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11  # noqa: E501
+    """
+    system = platform.system()
+    machine = platform.machine()
+
+    if system == "Linux":
+        arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"}
+    elif system == "Windows":
+        arch_to_targets = {
+            "AMD64": "x64",
+        }
+    else:
+        arch_to_targets = {}
+
+    return arch_to_targets.get(machine, None)
+
+
+def get_conda_include_dir():
+    """
+    Return the include directory in the current conda environment, if one
+    is active and it exists.
+    """
+    is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
+    if not is_conda_env:
+        return
+
+    if platform.system() == "Windows":
+        include_dir = os.path.join(sys.prefix, "Library", "include")
+    elif target_name := get_current_cuda_target_name():
+        include_dir = os.path.join(sys.prefix, "targets", target_name, "include")
+    else:
+        # A fallback when target cannot determined
+        # though usually it shouldn't.
+        include_dir = os.path.join(sys.prefix, "include")
+
+    if (
+        os.path.exists(include_dir)
+        and os.path.isdir(include_dir)
+        and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h"))
+    ):
+        return include_dir
+    return
+
+
+def _get_include_dir():
+    """Find the root include directory."""
+    options = [
+        ("Conda environment (NVIDIA package)", get_conda_include_dir()),
+        ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH),
+        # TODO: add others
+    ]
+    by, include_dir = _find_valid_path(options)
+    return _env_path_tuple(by, include_dir)
diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py
new file mode 100644
index 000000000..30a9b68f4
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py
@@ -0,0 +1,139 @@
+# Copyright 2024-2025 NVIDIA Corporation.  All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import functools
+import glob
+import os
+
+from .cuda_paths import IS_WIN32, get_cuda_paths
+from .sys_path_find_sub_dirs import sys_path_find_sub_dirs
+
+
+def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments):
+    error_messages.append(f"No such file: {file_wild}")
+    for sub_dir in sys_path_find_sub_dirs(sub_dirs):
+        attachments.append(f'  listdir("{sub_dir}"):')
+        for node in sorted(os.listdir(sub_dir)):
+            attachments.append(f"    {node}")
+
+
+def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments):
+    if libname == "nvvm":  # noqa: SIM108
+        nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64")
+    else:
+        nvidia_sub_dirs = ("nvidia", "*", "lib")
+    file_wild = so_basename + "*"
+    for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs):
+        # First look for an exact match
+        so_name = os.path.join(lib_dir, so_basename)
+        if os.path.isfile(so_name):
+            return so_name
+        # Look for a versioned library
+        # Using sort here mainly to make the result deterministic.
+        for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))):
+            so_name = os.path.join(lib_dir, node)
+            if os.path.isfile(so_name):
+                return so_name
+    _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments)
+    return None
+
+
+def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments):
+    if libname == "nvvm":  # noqa: SIM108
+        nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin")
+    else:
+        nvidia_sub_dirs = ("nvidia", "*", "bin")
+    file_wild = libname + "*.dll"
+    for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs):
+        for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))):
+            dll_name = os.path.join(bin_dir, node)
+            if os.path.isfile(dll_name):
+                return dll_name
+    _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments)
+    return None
+
+
+def _get_cuda_paths_info(key, error_messages):
+    env_path_tuple = get_cuda_paths()[key]
+    if not env_path_tuple:
+        error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]')
+        return None
+    if not env_path_tuple.info:
+        error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info')
+        return None
+    return env_path_tuple.info
+
+
+def _find_so_using_cudalib_dir(so_basename, error_messages, attachments):
+    cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages)
+    if cudalib_dir is None:
+        return None
+    primary_so_dir = cudalib_dir + "/"
+    candidate_so_dirs = [primary_so_dir]
+    libs = ["/lib/", "/lib64/"]
+    for _ in range(2):
+        alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1))
+        if alt_dir not in candidate_so_dirs:
+            candidate_so_dirs.append(alt_dir)
+        libs.reverse()
+    candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs]
+    error_messages = []
+    for so_name in candidate_so_names:
+        if os.path.isfile(so_name):
+            return so_name
+        error_messages.append(f"No such file: {so_name}")
+    for so_dirname in candidate_so_dirs:
+        attachments.append(f'  listdir("{so_dirname}"):')
+        if not os.path.isdir(so_dirname):
+            attachments.append("    DIRECTORY DOES NOT EXIST")
+        else:
+            for node in sorted(os.listdir(so_dirname)):
+                attachments.append(f"    {node}")
+    return None
+
+
+def _find_dll_using_cudalib_dir(libname, error_messages, attachments):
+    cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages)
+    if cudalib_dir is None:
+        return None
+    file_wild = libname + "*.dll"
+    for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))):
+        dll_name = os.path.join(cudalib_dir, node)
+        if os.path.isfile(dll_name):
+            return dll_name
+    error_messages.append(f"No such file: {file_wild}")
+    attachments.append(f'  listdir("{cudalib_dir}"):')
+    for node in sorted(os.listdir(cudalib_dir)):
+        attachments.append(f"    {node}")
+    return None
+
+
+@functools.cache
+def find_nvidia_dynamic_library(name: str) -> str:
+    error_messages = []
+    attachments = []
+
+    if IS_WIN32:
+        dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments)
+        if dll_name is None:
+            if name == "nvvm":
+                dll_name = _get_cuda_paths_info("nvvm", error_messages)
+            else:
+                dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments)
+        if dll_name is None:
+            attachments = "\n".join(attachments)
+            raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}")
+        return dll_name
+
+    so_basename = f"lib{name}.so"
+    so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments)
+    if so_name is None:
+        if name == "nvvm":
+            so_name = _get_cuda_paths_info("nvvm", error_messages)
+        else:
+            so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments)
+    if so_name is None:
+        attachments = "\n".join(attachments)
+        raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}")
+    return so_name
diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py
new file mode 100644
index 000000000..4de57c905
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py
@@ -0,0 +1,69 @@
+# Forked from:
+# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py
+
+import os
+import re
+import sys
+
+
+def get_lib_dirs():
+    """
+    Anaconda specific
+    """
+    if sys.platform == "win32":
+        # on windows, historically `DLLs` has been used for CUDA libraries,
+        # since approximately CUDA 9.2, `Library\bin` has been used.
+        dirnames = ["DLLs", os.path.join("Library", "bin")]
+    else:
+        dirnames = [
+            "lib",
+        ]
+    libdirs = [os.path.join(sys.prefix, x) for x in dirnames]
+    return libdirs
+
+
+DLLNAMEMAP = {
+    "linux": r"lib%(name)s\.so\.%(ver)s$",
+    "linux2": r"lib%(name)s\.so\.%(ver)s$",
+    "linux-static": r"lib%(name)s\.a$",
+    "darwin": r"lib%(name)s\.%(ver)s\.dylib$",
+    "win32": r"%(name)s%(ver)s\.dll$",
+    "win32-static": r"%(name)s\.lib$",
+    "bsd": r"lib%(name)s\.so\.%(ver)s$",
+}
+
+RE_VER = r"[0-9]*([_\.][0-9]+)*"
+
+
+def find_lib(libname, libdir=None, platform=None, static=False):
+    platform = platform or sys.platform
+    platform = "bsd" if "bsd" in platform else platform
+    if static:
+        platform = f"{platform}-static"
+    if platform not in DLLNAMEMAP:
+        # Return empty list if platform name is undefined.
+        # Not all platforms define their static library paths.
+        return []
+    pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER}
+    regex = re.compile(pat)
+    return find_file(regex, libdir)
+
+
+def find_file(pat, libdir=None):
+    if libdir is None:
+        libdirs = get_lib_dirs()
+    elif isinstance(libdir, str):
+        libdirs = [
+            libdir,
+        ]
+    else:
+        libdirs = list(libdir)
+    files = []
+    for ldir in libdirs:
+        try:
+            entries = os.listdir(ldir)
+        except FileNotFoundError:
+            continue
+        candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)]
+        files.extend([c for c in candidates if os.path.isfile(c)])
+    return files
diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py
new file mode 100644
index 000000000..692e8e0bc
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py
@@ -0,0 +1,92 @@
+import functools
+import sys
+
+if sys.platform == "win32":
+    import ctypes.wintypes
+
+    import pywintypes
+    import win32api
+
+    # Mirrors WinBase.h (unfortunately not defined already elsewhere)
+    _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
+
+else:
+    import ctypes
+    import os
+
+    _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL
+
+from .find_nvidia_dynamic_library import find_nvidia_dynamic_library
+
+
+@functools.cache
+def _windows_cuDriverGetVersion() -> int:
+    handle = win32api.LoadLibrary("nvcuda.dll")
+
+    kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
+    GetProcAddress = kernel32.GetProcAddress
+    GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR]
+    GetProcAddress.restype = ctypes.c_void_p
+    cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion")
+    assert cuDriverGetVersion
+
+    FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int))
+    cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion)
+    driver_ver = ctypes.c_int()
+    err = cuDriverGetVersion_fn(ctypes.byref(driver_ver))
+    assert err == 0
+    return driver_ver.value
+
+
+@functools.cache
+def _windows_load_with_dll_basename(name: str) -> int:
+    driver_ver = _windows_cuDriverGetVersion()
+    del driver_ver  # Keeping this here because it will probably be needed in the future.
+
+    if name == "nvJitLink":
+        dll_name = "nvJitLink_120_0.dll"
+    elif name == "nvrtc":
+        dll_name = "nvrtc64_120_0.dll"
+    elif name == "nvvm":
+        dll_name = "nvvm64_40_0.dll"
+
+    try:
+        return win32api.LoadLibrary(dll_name)
+    except pywintypes.error:
+        pass
+
+    return None
+
+
+@functools.cache
+def load_nvidia_dynamic_library(name: str) -> int:
+    # First try using the platform-specific dynamic loader search mechanisms
+    if sys.platform == "win32":
+        handle = _windows_load_with_dll_basename(name)
+        if handle:
+            return handle
+    else:
+        dl_path = f"lib{name}.so"  # Version intentionally no specified.
+        try:
+            handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE)
+        except OSError:
+            pass
+        else:
+            # Use `cdef void* ptr = <void*><uintptr_t>` in cython to convert back to void*
+            return handle._handle  # C unsigned int
+
+    dl_path = find_nvidia_dynamic_library(name)
+    if sys.platform == "win32":
+        try:
+            handle = win32api.LoadLibrary(dl_path)
+        except pywintypes.error as e:
+            raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e
+        # Use `cdef void* ptr = <void*><intptr_t>` in cython to convert back to void*
+        return handle  # C signed int, matches win32api.GetProcAddress
+    else:
+        try:
+            handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE)
+        except OSError as e:
+            raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e
+        # Use `cdef void* ptr = <void*><uintptr_t>` in cython to convert back to void*
+        return handle._handle  # C unsigned int
diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py
new file mode 100644
index 000000000..d2da726c9
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py
@@ -0,0 +1,40 @@
+# Copyright 2024-2025 NVIDIA Corporation.  All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import functools
+import os
+import sys
+
+
+@functools.cache
+def _impl(sys_path, sub_dirs):
+    results = []
+    for base in sys_path:
+        stack = [(base, 0)]  # (current_path, index into sub_dirs)
+        while stack:
+            current_path, idx = stack.pop()
+            if idx == len(sub_dirs):
+                if os.path.isdir(current_path):
+                    results.append(current_path)
+                continue
+
+            sub = sub_dirs[idx]
+            if sub == "*":
+                try:
+                    entries = sorted(os.listdir(current_path))
+                except OSError:
+                    continue
+                for entry in entries:
+                    entry_path = os.path.join(current_path, entry)
+                    if os.path.isdir(entry_path):
+                        stack.append((entry_path, idx + 1))
+            else:
+                next_path = os.path.join(current_path, sub)
+                if os.path.isdir(next_path):
+                    stack.append((next_path, idx + 1))
+    return results
+
+
+def sys_path_find_sub_dirs(sub_dirs):
+    return _impl(tuple(sys.path), tuple(sub_dirs))
diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py
new file mode 100644
index 000000000..21aeb4b36
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/path_finder.py
@@ -0,0 +1,37 @@
+# Copyright 2024-2025 NVIDIA Corporation.  All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+from cuda.bindings._path_finder.cuda_paths import (
+    get_conda_ctk,
+    get_conda_include_dir,
+    get_cuda_home,
+    get_cuda_paths,
+    get_current_cuda_target_name,
+    get_debian_pkg_libdevice,
+    get_libdevice_wheel,
+    get_nvidia_cudalib_ctk,
+    get_nvidia_libdevice_ctk,
+    get_nvidia_nvvm_ctk,
+    get_nvidia_static_cudalib_ctk,
+    get_system_ctk,
+)
+from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library
+from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library
+
+__all__ = [
+    "find_nvidia_dynamic_library",
+    "load_nvidia_dynamic_library",
+    "get_conda_ctk",
+    "get_conda_include_dir",
+    "get_cuda_home",
+    "get_cuda_paths",
+    "get_current_cuda_target_name",
+    "get_debian_pkg_libdevice",
+    "get_libdevice_wheel",
+    "get_nvidia_cudalib_ctk",
+    "get_nvidia_libdevice_ctk",
+    "get_nvidia_nvvm_ctk",
+    "get_nvidia_static_cudalib_ctk",
+    "get_system_ctk",
+]
diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py
new file mode 100644
index 000000000..e9245a5be
--- /dev/null
+++ b/cuda_bindings/tests/path_finder.py
@@ -0,0 +1,9 @@
+from cuda.bindings import path_finder
+
+paths = path_finder.get_cuda_paths()
+
+for k, v in paths.items():
+    print(f"{k}: {v}", flush=True)
+
+print(path_finder.find_nvidia_dynamic_library("nvvm"))
+print(path_finder.find_nvidia_dynamic_library("nvJitLink"))
diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py
new file mode 100644
index 000000000..3297ce39e
--- /dev/null
+++ b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py
@@ -0,0 +1,72 @@
+import os
+
+import pytest
+
+from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl
+
+
+@pytest.fixture
+def test_tree(tmp_path):
+    # Build:
+    # tmp_path/
+    #   sys1/nvidia/foo/lib
+    #   sys1/nvidia/bar/lib
+    #   sys2/nvidia/baz/nvvm/lib64
+    base = tmp_path
+    (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True)
+    (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True)
+    (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True)
+
+    return {
+        "sys_path": (
+            str(base / "sys1"),
+            str(base / "sys2"),
+            str(base / "nonexistent"),  # should be ignored
+        ),
+        "base": base,
+    }
+
+
+def test_exact_match(test_tree):
+    sys_path = test_tree["sys_path"]
+    base = test_tree["base"]
+    result = _impl(sys_path, ("nvidia", "foo", "lib"))
+    expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")]
+    assert result == expected
+
+
+def test_single_wildcard(test_tree):
+    sys_path = test_tree["sys_path"]
+    base = test_tree["base"]
+    result = _impl(sys_path, ("nvidia", "*", "lib"))
+    expected = [
+        str(base / "sys1" / "nvidia" / "bar" / "lib"),
+        str(base / "sys1" / "nvidia" / "foo" / "lib"),
+    ]
+    assert sorted(result) == sorted(expected)
+
+
+def test_double_wildcard(test_tree):
+    sys_path = test_tree["sys_path"]
+    base = test_tree["base"]
+    result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64"))
+    expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")]
+    assert result == expected
+
+
+def test_no_match(test_tree):
+    sys_path = test_tree["sys_path"]
+    result = _impl(sys_path, ("nvidia", "nonexistent", "lib"))
+    assert result == []
+
+
+def test_empty_sys_path():
+    result = _impl((), ("nvidia", "*", "lib"))
+    assert result == []
+
+
+def test_empty_sub_dirs(test_tree):
+    sys_path = test_tree["sys_path"]
+    result = _impl(sys_path, ())
+    expected = [p for p in sys_path if os.path.isdir(p)]
+    assert sorted(result) == sorted(expected)