Skip to content

[Backport] Fix support for NVVM from conda on Windows + other fixes #574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 37 additions & 31 deletions cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,54 +55,60 @@ cdef int cuPythonInit() except -1 nogil:
except:
handle = None

# Else try default search
if not handle:
LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
try:
handle = win32api.LoadLibraryEx("nvrtc64_112_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
try:
handle = win32api.LoadLibraryEx("nvrtc64_111_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
try:
handle = win32api.LoadLibraryEx("nvrtc64_110_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
pass

# Final check if DLLs can be found within pip installations
# Next check if DLLs can be found within pip installations
if not handle:
LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000
LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
site_packages = [site.getusersitepackages()] + site.getsitepackages()
for sp in site_packages:
mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin")
if not os.path.isdir(mod_path):
continue
os.add_dll_directory(mod_path)
LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000
LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
try:
handle = win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, "nvrtc64_112_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)

# Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is
# located in the same mod_path.
# Update PATH environ so that the two dlls can find each other
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
try:
handle = win32api.LoadLibraryEx(
os.path.join(mod_path, "nvrtc64_111_0.dll"),
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, "nvrtc64_112_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)

# Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is
# located in the same mod_path.
# Update PATH environ so that the two dlls can find each other
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
try:
handle = win32api.LoadLibraryEx(
os.path.join(mod_path, "nvrtc64_110_0.dll"),
os.path.join(mod_path, "nvrtc64_111_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
pass
try:
handle = win32api.LoadLibraryEx(
os.path.join(mod_path, "nvrtc64_110_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
pass
else:
break
else:
break
else:
break
else:
# Else try default search
# Only reached if DLL wasn't found in any site-package path
LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
try:
handle = win32api.LoadLibraryEx("nvrtc64_112_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
try:
handle = win32api.LoadLibraryEx("nvrtc64_111_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
try:
handle = win32api.LoadLibraryEx("nvrtc64_110_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
pass

if not handle:
raise RuntimeError('Failed to LoadLibraryEx nvrtc64_112_0.dll, or nvrtc64_111_0.dll, or nvrtc64_110_0.dll')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm asking mainly from the perspective of working on the path_finder code: Do I need to look around more?

Concretely, what distributions are there with nvrtc64_111_0.dll or nvrtc64_110_0.dll?

I ran unzip -l for all cuda_nvrtc-windows-x86_64*.zip that I found under

https://developer.download.nvidia.com/compute/cuda/redist/

For CTK 11.x, I only see nvrtc64_112_0.dll (see below).

I also looked in the nvidia_cuda_nvrtc_cu11 wheel, it has nvrtc64_112_0.dll, too.

$ grep nvrtc64_......dll *.txt
all_windows-x86_64_unzip_l.txt: 32601600  2021-08-16 03:08   cuda_nvrtc-windows-x86_64-11.4.120-archive/lib/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 32604160  2021-10-12 01:29   cuda_nvrtc-windows-x86_64-11.4.152-archive/lib/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 32961024  2021-11-18 12:45   cuda_nvrtc-windows-x86_64-11.5.119-archive/lib/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 32949248  2021-09-14 01:05   cuda_nvrtc-windows-x86_64-11.5.50-archive/lib/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 33174528  2022-02-10 19:20   cuda_nvrtc-windows-x86_64-11.6.112-archive/bin/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 33175552  2022-03-08 18:51   cuda_nvrtc-windows-x86_64-11.6.124-archive/bin/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 33174528  2021-12-17 19:06   cuda_nvrtc-windows-x86_64-11.6.55-archive/lib/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 33908224  2022-04-05 19:49   cuda_nvrtc-windows-x86_64-11.7.50-archive/bin/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 33909248  2022-06-08 17:38   cuda_nvrtc-windows-x86_64-11.7.99-archive/bin/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 40629248  2022-09-21 11:15   cuda_nvrtc-windows-x86_64-11.8.89-archive/bin/nvrtc64_112_0.dll
all_windows-x86_64_unzip_l.txt: 41392128  2023-01-06 22:09   cuda_nvrtc-windows-x86_64-12.0.140-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 41385984  2022-10-24 20:20   cuda_nvrtc-windows-x86_64-12.0.76-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 42161152  2023-04-03 18:07   cuda_nvrtc-windows-x86_64-12.1.105-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 42159104  2023-01-24 20:29   cuda_nvrtc-windows-x86_64-12.1.55-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 42494976  2023-07-11 04:05   cuda_nvrtc-windows-x86_64-12.2.128-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 42521600  2023-08-15 22:34   cuda_nvrtc-windows-x86_64-12.2.140-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 42499584  2023-06-13 20:23   cuda_nvrtc-windows-x86_64-12.2.91-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45593088  2023-11-03 18:01   cuda_nvrtc-windows-x86_64-12.3.103-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45593088  2023-11-22 11:19   cuda_nvrtc-windows-x86_64-12.3.107-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45592064  2023-09-08 20:10   cuda_nvrtc-windows-x86_64-12.3.52-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 44738048  2024-03-15 15:46   cuda_nvrtc-windows-x86_64-12.4.127-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 44733952  2024-02-27 18:05   cuda_nvrtc-windows-x86_64-12.4.99-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45325312  2024-04-17 19:59   cuda_nvrtc-windows-x86_64-12.5.40-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45325312  2024-06-06 03:48   cuda_nvrtc-windows-x86_64-12.5.82-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45474304  2024-06-14 17:19   cuda_nvrtc-windows-x86_64-12.6.20-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45475328  2024-08-14 11:11   cuda_nvrtc-windows-x86_64-12.6.68-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45865984  2024-09-12 03:24   cuda_nvrtc-windows-x86_64-12.6.77-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 45865984  2024-10-30 02:04   cuda_nvrtc-windows-x86_64-12.6.85-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 86702592  2025-01-15 20:14   cuda_nvrtc-windows-x86_64-12.8.61-archive/bin/nvrtc64_120_0.dll
all_windows-x86_64_unzip_l.txt: 86728192  2025-02-21 21:13   cuda_nvrtc-windows-x86_64-12.8.93-archive/bin/nvrtc64_120_0.dll

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is what I know (learned actually from @kkraus14 long time ago) 🙂 CUDA promised minor version compatibility since CUDA 11, however back then the compiler was actually not stabilized enough to fulfill this story until 11.2. So before that each minor release had a different SONAME (this is same on Linux too), and the reality is minor ver compat is supported since 11.2. Technically we should just drop the support for 11.0/11.1 (they are untested anyway) but Vlad has been kept the old code around for a long time and no body complained. The story will become a lot cleaner once we drop 11.x support.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To close the loop here:

With 38a1d6c the cuda.bindings.path_finder now has this for nvrtc:

    "nvrtc": (
        "nvrtc64_110_0.dll",
        "nvrtc64_111_0.dll",
        "nvrtc64_112_0.dll",
        "nvrtc64_120_0.dll",
    ),

This is the result of downloading all released cuda_*win*.exe files for 11.x and 12.x, and then inspecting the .dll files in all of them automatically (toolshed/build_path_finder_dlls.py).

I believe with this general/systematic approach we can be certain that we will not miss any "already loaded" DLLs.

Expand Down
50 changes: 24 additions & 26 deletions cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cdef void* __nvvmGetProgramLog = NULL


cdef inline list get_site_packages():
return [site.getusersitepackages()] + site.getsitepackages()
return [site.getusersitepackages()] + site.getsitepackages() + ["conda"]


cdef load_library(const int driver_ver):
Expand All @@ -50,44 +50,42 @@ cdef load_library(const int driver_ver):
for suffix in get_nvvm_dso_version_suffix(driver_ver):
if len(suffix) == 0:
continue
dll_name = "nvvm64_40_0"
dll_name = "nvvm64_40_0.dll"

# First check if the DLL has been loaded by 3rd parties
try:
handle = win32api.GetModuleHandle(dll_name)
return win32api.GetModuleHandle(dll_name)
except:
pass
else:
break

# Next, check if DLLs are installed via pip
# Next, check if DLLs are installed via pip or conda
for sp in get_site_packages():
mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin")
if not os.path.isdir(mod_path):
continue
os.add_dll_directory(mod_path)
try:
handle = win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, dll_name),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
except:
pass
else:
break
if sp == "conda":
# nvvm is not under $CONDA_PREFIX/lib, so it's not in the default search path
conda_prefix = os.environ.get("CONDA_PREFIX")
if conda_prefix is None:
continue
mod_path = os.path.join(conda_prefix, "Library", "nvvm", "bin")
else:
mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin")
if os.path.isdir(mod_path):
os.add_dll_directory(mod_path)
try:
return win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, dll_name),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
except:
pass

# Finally, try default search
# Only reached if DLL wasn't found in any site-package path
try:
handle = win32api.LoadLibrary(dll_name)
return win32api.LoadLibrary(dll_name)
except:
pass
else:
break
else:
raise RuntimeError('Failed to load nvvm')

assert handle != 0
return handle
raise RuntimeError('Failed to load nvvm')


cdef int _check_or_init_nvvm() except -1 nogil:
Expand Down
2 changes: 1 addition & 1 deletion cuda_bindings/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def build_extension(self, ext):
# to <loc>/site-packages/nvidia/cuda_nvcc/nvvm/lib64/
rel1 = "$ORIGIN/../../../nvidia/cuda_nvcc/nvvm/lib64"
# from <loc>/lib/python3.*/site-packages/cuda/bindings/_internal/
# to <loc>/lib/nvvm/lib64/
# to <loc>/nvvm/lib64/
rel2 = "$ORIGIN/../../../../../../nvvm/lib64"
ldflag = f"-Wl,--disable-new-dtags,-rpath,{rel1},-rpath,{rel2}"
else:
Expand Down
Loading