From c6e0a02adeb13616f27eef6ad4729844bc55d37e Mon Sep 17 00:00:00 2001 From: ksimpson Date: Thu, 10 Oct 2024 13:39:46 -0700 Subject: [PATCH 01/34] naive approach to adding bindings --- cuda/cuda/bindings/_bindings/nvJitLink.pxd | 26 ++ .../bindings/_bindings/nvJitLink_linux.pyx | 382 +++++++++++++++++ .../bindings/_bindings/nvJitLink_windows.pyx | 393 ++++++++++++++++++ cuda/cuda/bindings/cynvJitLink.pxd | 48 +++ cuda/cuda/bindings/cynvJitLink.pyx | 63 +++ cuda/cuda/bindings/nvJitLink.pxd | 46 ++ cuda/cuda/bindings/nvJitLink.pyx | 138 ++++++ cuda/cuda/bindings/tests/test_nvJitLink.py | 3 + cuda/setup.py | 3 +- 9 files changed, 1101 insertions(+), 1 deletion(-) create mode 100644 cuda/cuda/bindings/_bindings/nvJitLink.pxd create mode 100644 cuda/cuda/bindings/_bindings/nvJitLink_linux.pyx create mode 100644 cuda/cuda/bindings/_bindings/nvJitLink_windows.pyx create mode 100644 cuda/cuda/bindings/cynvJitLink.pxd create mode 100644 cuda/cuda/bindings/cynvJitLink.pyx create mode 100644 cuda/cuda/bindings/nvJitLink.pxd create mode 100644 cuda/cuda/bindings/nvJitLink.pyx create mode 100644 cuda/cuda/bindings/tests/test_nvJitLink.py diff --git a/cuda/cuda/bindings/_bindings/nvJitLink.pxd b/cuda/cuda/bindings/_bindings/nvJitLink.pxd new file mode 100644 index 000000000..dca128a0e --- /dev/null +++ b/cuda/cuda/bindings/_bindings/nvJitLink.pxd @@ -0,0 +1,26 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ..cynvJitLink cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda/cuda/bindings/_bindings/nvJitLink_linux.pyx new file mode 100644 index 000000000..2fc6ca625 --- /dev/null +++ b/cuda/cuda/bindings/_bindings/nvJitLink_linux.pyx @@ -0,0 +1,382 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvJitLinkCreate + __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') + if __nvJitLinkCreate == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') + + global __nvJitLinkDestroy + __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') + if __nvJitLinkDestroy == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') + + global __nvJitLinkAddData + __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') + if __nvJitLinkAddData == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') + + global __nvJitLinkAddFile + __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') + if __nvJitLinkAddFile == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') + + global __nvJitLinkComplete + __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') + if __nvJitLinkComplete == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') + + global __nvJitLinkGetLinkedCubinSize + __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') + if __nvJitLinkGetLinkedCubinSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') + + global __nvJitLinkGetLinkedCubin + __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') + if __nvJitLinkGetLinkedCubin == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') + + global __nvJitLinkGetLinkedPtxSize + __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') + if __nvJitLinkGetLinkedPtxSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') + + global __nvJitLinkGetLinkedPtx + __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') + if __nvJitLinkGetLinkedPtx == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') + + global __nvJitLinkGetErrorLogSize + __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') + if __nvJitLinkGetErrorLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') + + global __nvJitLinkGetErrorLog + __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') + if __nvJitLinkGetErrorLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') + + global __nvJitLinkGetInfoLogSize + __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') + if __nvJitLinkGetInfoLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') + + global __nvJitLinkGetInfoLog + __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') + if __nvJitLinkGetInfoLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda/cuda/bindings/_bindings/nvJitLink_windows.pyx new file mode 100644 index 000000000..8856b59ca --- /dev/null +++ b/cuda/cuda/bindings/_bindings/nvJitLink_windows.pyx @@ -0,0 +1,393 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +import os +import site + +import win32api + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Wrapper init +############################################################################### + +LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 +LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink64_{suffix}.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvJitLink') + + assert handle != 0 + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + cdef int err, driver_ver + with gil: + # Load driver to check version + try: + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + except Exception as e: + raise NotSupportedError(f'CUDA driver is not found ({e})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + if __cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + # Load library + handle = load_library(driver_ver) + + # Load function + global __nvJitLinkCreate + try: + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + except: + pass + + global __nvJitLinkDestroy + try: + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + except: + pass + + global __nvJitLinkAddData + try: + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + except: + pass + + global __nvJitLinkAddFile + try: + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + except: + pass + + global __nvJitLinkComplete + try: + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + except: + pass + + global __nvJitLinkGetLinkedCubinSize + try: + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + except: + pass + + global __nvJitLinkGetLinkedCubin + try: + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + except: + pass + + global __nvJitLinkGetLinkedPtxSize + try: + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + except: + pass + + global __nvJitLinkGetLinkedPtx + try: + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + except: + pass + + global __nvJitLinkGetErrorLogSize + try: + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + except: + pass + + global __nvJitLinkGetErrorLog + try: + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + except: + pass + + global __nvJitLinkGetInfoLogSize + try: + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + except: + pass + + global __nvJitLinkGetInfoLog + try: + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + except: + pass + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda/cuda/bindings/cynvJitLink.pxd b/cuda/cuda/bindings/cynvJitLink.pxd new file mode 100644 index 000000000..ed440c0b3 --- /dev/null +++ b/cuda/cuda/bindings/cynvJitLink.pxd @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + + +from libc.stdint cimport int64_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums + + + +# types +cdef extern from *: + """ + #include + #include + #include + """ + ctypedef void* cudaStream_t 'cudaStream_t' + + + + + +############################################################################### +# Functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda/cuda/bindings/cynvJitLink.pyx b/cuda/cuda/bindings/cynvJitLink.pyx new file mode 100644 index 000000000..65d3f9840 --- /dev/null +++ b/cuda/cuda/bindings/cynvJitLink.pyx @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ._internal cimport nvJitLink as _nvJitLink + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + return _nvJitLink._nvJitLinkCreate(handle, numOptions, options) + + +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + return _nvJitLink._nvJitLinkDestroy(handle) + + +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + return _nvJitLink._nvJitLinkAddData(handle, inputType, data, size, name) + + +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + return _nvJitLink._nvJitLinkAddFile(handle, inputType, fileName) + + +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + return _nvJitLink._nvJitLinkComplete(handle) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubinSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubin(handle, cubin) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtxSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtx(handle, ptx) + + +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLog(handle, log) diff --git a/cuda/cuda/bindings/nvJitLink.pxd b/cuda/cuda/bindings/nvJitLink.pxd new file mode 100644 index 000000000..d063002be --- /dev/null +++ b/cuda/cuda/bindings/nvJitLink.pxd @@ -0,0 +1,46 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .cynvJitLink cimport * + + +############################################################################### +# Types +############################################################################### + + + +ctypedef cudaStream_t Stream +ctypedef cudaDataType DataType +ctypedef libraryPropertyType_t LibraryPropertyType + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) +cpdef destroy(intptr_t handle) +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name) +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name) +cpdef complete(nvJitLinkHandle handle) +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin) +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx) +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log) +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log) diff --git a/cuda/cuda/bindings/nvJitLink.pyx b/cuda/cuda/bindings/nvJitLink.pyx new file mode 100644 index 000000000..18f4c7545 --- /dev/null +++ b/cuda/cuda/bindings/nvJitLink.pyx @@ -0,0 +1,138 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +cimport cython # NOQA + +from enum import IntEnum as _IntEnum + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Error handling +############################################################################### + +cdef dict STATUS={ + NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', + NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', + NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', // -arch=sm_NN option not specified + NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', + NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', + NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', + NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL', + NVJITLINK_ERROR_THREADPOOL : 'NVJITLINK_ERROR_THREADPOOL', + NVJITLINK_ERROR_UNRECOGNIZED_INPUT : 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT', + NVJITLINK_ERROR_NULL_INPUT : 'NVJITLINK_ERROR_NULL_INPUT', + NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS: 'NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS', + NVJITLINK_ERROR_INCORRECT_INPUT_TYPE: 'NVJITLINK_ERROR_INCORRECT_INPUT_TYPE', + NVJITLINK_ERROR_ARCH_MISMATCH : 'NVJITLINK_ERROR_ARCH_MISMATCH', + NVJITLINK_ERROR_OUTDATED_LIBRARY : 'NVJITLINK_ERROR_OUTDATED_LIBRARY', + NVJITLINK_ERROR_MISSING_FATBIN : 'NVJITLINK_ERROR_MISSING_FATBIN' +} + +class nvJitLinkError(Exception): + + def __init__(self, status): + self.status = status + cdef str err = STATUS[status] + super(nvJitLinkError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef inline void check_status(int status) nogil: + if status != 0: + with gil: + raise nvJitLinkError(status) + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): + with nogil: + status = nvJitLinkCreate(handle, num_options, options) + _check_status(status) + + +cpdef destroy(intptr_t handle): + with nogil: + status = nvJitLinkDestroy(handle) + _check_status(status) + + +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name): + with nogil: + status = nvJitLinkAddData(handle, input_type, data, size, name) + _check_status(status) + + +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name): + with nogil: + status = nvJitLinkAddFile(handle, input_type, file_name) + _check_status(status) + + +cpdef complete(nvJitLinkHandle handle): + with nogil: + status = nvJitLinkComplete(handle) + _check_status(status) + + +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedCubinSize(handle, size) + _check_status(status) + + +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin): + with nogil: + status = nvJitLinkGetLinkedCubin(handle, cubin) + _check_status(status) + + +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedPtxSize(handle, size) + _check_status(status) + + +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx): + with nogil: + status = nvJitLinkGetLinkedPtx(handle, ptx) + _check_status(status) + + +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetErrorLogSize(handle, size) + _check_status(status) + + +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetErrorLog(handle, log) + _check_status(status) + + +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetInfoLogSize(handle, size) + _check_status(status) + + +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetInfoLog(handle, log) + _check_status(status) diff --git a/cuda/cuda/bindings/tests/test_nvJitLink.py b/cuda/cuda/bindings/tests/test_nvJitLink.py new file mode 100644 index 000000000..7ced5ff38 --- /dev/null +++ b/cuda/cuda/bindings/tests/test_nvJitLink.py @@ -0,0 +1,3 @@ +import pytest +from cuda import nvJitLink + diff --git a/cuda/setup.py b/cuda/setup.py index ec5236261..8987151a8 100644 --- a/cuda/setup.py +++ b/cuda/setup.py @@ -57,7 +57,8 @@ 'cuda_egl_interop.h', 'cuda_gl_interop.h', 'cuda_vdpau_interop.h'], - 'nvrtc' : ['nvrtc.h']} + 'nvrtc' : ['nvrtc.h'], + 'nvJitLink' : ['nvJitLink.h'],} replace = {' __device_builtin__ ':' ', 'CUDARTAPI ':' ', From 47db0c7b35f43ee2a60a2a8b7307afdadecf9f51 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 15 Oct 2024 10:11:07 -0700 Subject: [PATCH 02/34] add test file --- cuda/cuda/bindings/tests/test_nvJitLink.py | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/cuda/cuda/bindings/tests/test_nvJitLink.py b/cuda/cuda/bindings/tests/test_nvJitLink.py index 7ced5ff38..f566ae7c6 100644 --- a/cuda/cuda/bindings/tests/test_nvJitLink.py +++ b/cuda/cuda/bindings/tests/test_nvJitLink.py @@ -1,3 +1,164 @@ import pytest from cuda import nvJitLink +def test_create_no_arch_error(): + # nvjitlink expects at least the architecture to be specified. + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): + nvJitLink.create() + + +def test_invalid_arch_error(): + # sm_XX is not a valid architecture + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-arch=sm_XX") + + +def test_unrecognized_option_error(): + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-fictitious_option") + + +def test_invalid_option_type_error(): + with pytest.raises(TypeError, match="Expecting only strings"): + nvJitLink.create("-arch", 53) + + +def test_create_and_destroy(): + handle = nvJitLink.create("-arch=sm_53") + assert handle != 0 + nvJitLink.destroy(handle) + + +def test_complete_empty(): + handle = nvJitLink.create("-arch=sm_75") + nvJitLink.complete(handle) + nvJitLink.destroy(handle) + + +@pytest.mark.parametrize( + "input_file,input_type", + [ + ("device_functions_cubin", nvJitLink.InputType.CUBIN), + ("device_functions_fatbin", InputType.FATBIN), + ("device_functions_ptx", InputType.PTX), + ("device_functions_object", InputType.OBJECT), + ("device_functions_archive", InputType.LIBRARY), + ], +) +def test_add_file(input_file, input_type, gpu_arch_flag, request): + filename, data = request.getfixturevalue(input_file) + + handle = nvJitLink.create(gpu_arch_flag) + nvJitLink.add_data(handle, input_type.value, data, filename) + nvJitLink.destroy(handle) + + +# We test the LTO input case separately as it requires the `-lto` flag. The +# OBJECT input type is used because the LTO-IR container is packaged in an ELF +# object when produced by NVCC. +def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) + nvJitLink.destroy(handle) + + +def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = undefined_extern_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError): + nvJitLink.complete(handle) + error_log = nvJitLink.get_error_log(handle) + nvJitLink.destroy(handle) + assert ( + "Undefined reference to '_Z5undefff' " + "in 'undefined_extern.cubin'" in error_log + ) + + +def test_get_info_log(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + info_log = nvJitLink.get_info_log(handle) + nvJitLink.destroy(handle) + # Info log is empty + assert "" == info_log + + +def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_cubin_link_not_complete_error( + device_functions_cubin, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_ptx_link_not_complete_error( + device_functions_ltoir_object, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + filename, data = device_functions_ltoir_object + input_type = InputType.OBJECT.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_package_version(): + assert pynvjitlink.__version__ is not None + assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file From 84efbb02efdcb73bc327a005748fcc799332b2aa Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 15 Oct 2024 12:47:51 -0700 Subject: [PATCH 03/34] rebase --- .../cuda/bindings/_bindings/nvJitLink.pxd | 26 ++ .../bindings/_bindings/nvJitLink_linux.pyx | 382 +++++++++++++++++ .../bindings/_bindings/nvJitLink_windows.pyx | 393 ++++++++++++++++++ cuda_bindings/cynvJitLink.pxd | 48 +++ cuda_bindings/cynvJitLink.pyx | 63 +++ cuda_bindings/nvJitLink.pxd | 46 ++ cuda_bindings/nvJitLink.pyx | 138 ++++++ cuda_bindings/setup.py | 3 +- cuda_bindings/tests/test_nvJitLink.py | 3 + 9 files changed, 1101 insertions(+), 1 deletion(-) create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx create mode 100644 cuda_bindings/cynvJitLink.pxd create mode 100644 cuda_bindings/cynvJitLink.pyx create mode 100644 cuda_bindings/nvJitLink.pxd create mode 100644 cuda_bindings/nvJitLink.pyx create mode 100644 cuda_bindings/tests/test_nvJitLink.py diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd new file mode 100644 index 000000000..dca128a0e --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd @@ -0,0 +1,26 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ..cynvJitLink cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx new file mode 100644 index 000000000..2fc6ca625 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx @@ -0,0 +1,382 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvJitLinkCreate + __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') + if __nvJitLinkCreate == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') + + global __nvJitLinkDestroy + __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') + if __nvJitLinkDestroy == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') + + global __nvJitLinkAddData + __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') + if __nvJitLinkAddData == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') + + global __nvJitLinkAddFile + __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') + if __nvJitLinkAddFile == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') + + global __nvJitLinkComplete + __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') + if __nvJitLinkComplete == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') + + global __nvJitLinkGetLinkedCubinSize + __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') + if __nvJitLinkGetLinkedCubinSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') + + global __nvJitLinkGetLinkedCubin + __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') + if __nvJitLinkGetLinkedCubin == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') + + global __nvJitLinkGetLinkedPtxSize + __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') + if __nvJitLinkGetLinkedPtxSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') + + global __nvJitLinkGetLinkedPtx + __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') + if __nvJitLinkGetLinkedPtx == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') + + global __nvJitLinkGetErrorLogSize + __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') + if __nvJitLinkGetErrorLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') + + global __nvJitLinkGetErrorLog + __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') + if __nvJitLinkGetErrorLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') + + global __nvJitLinkGetInfoLogSize + __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') + if __nvJitLinkGetInfoLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') + + global __nvJitLinkGetInfoLog + __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') + if __nvJitLinkGetInfoLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx new file mode 100644 index 000000000..8856b59ca --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx @@ -0,0 +1,393 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +import os +import site + +import win32api + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Wrapper init +############################################################################### + +LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 +LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink64_{suffix}.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvJitLink') + + assert handle != 0 + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + cdef int err, driver_ver + with gil: + # Load driver to check version + try: + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + except Exception as e: + raise NotSupportedError(f'CUDA driver is not found ({e})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + if __cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + # Load library + handle = load_library(driver_ver) + + # Load function + global __nvJitLinkCreate + try: + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + except: + pass + + global __nvJitLinkDestroy + try: + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + except: + pass + + global __nvJitLinkAddData + try: + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + except: + pass + + global __nvJitLinkAddFile + try: + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + except: + pass + + global __nvJitLinkComplete + try: + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + except: + pass + + global __nvJitLinkGetLinkedCubinSize + try: + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + except: + pass + + global __nvJitLinkGetLinkedCubin + try: + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + except: + pass + + global __nvJitLinkGetLinkedPtxSize + try: + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + except: + pass + + global __nvJitLinkGetLinkedPtx + try: + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + except: + pass + + global __nvJitLinkGetErrorLogSize + try: + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + except: + pass + + global __nvJitLinkGetErrorLog + try: + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + except: + pass + + global __nvJitLinkGetInfoLogSize + try: + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + except: + pass + + global __nvJitLinkGetInfoLog + try: + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + except: + pass + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda_bindings/cynvJitLink.pxd b/cuda_bindings/cynvJitLink.pxd new file mode 100644 index 000000000..ed440c0b3 --- /dev/null +++ b/cuda_bindings/cynvJitLink.pxd @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + + +from libc.stdint cimport int64_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums + + + +# types +cdef extern from *: + """ + #include + #include + #include + """ + ctypedef void* cudaStream_t 'cudaStream_t' + + + + + +############################################################################### +# Functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cynvJitLink.pyx b/cuda_bindings/cynvJitLink.pyx new file mode 100644 index 000000000..65d3f9840 --- /dev/null +++ b/cuda_bindings/cynvJitLink.pyx @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ._internal cimport nvJitLink as _nvJitLink + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + return _nvJitLink._nvJitLinkCreate(handle, numOptions, options) + + +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + return _nvJitLink._nvJitLinkDestroy(handle) + + +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + return _nvJitLink._nvJitLinkAddData(handle, inputType, data, size, name) + + +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + return _nvJitLink._nvJitLinkAddFile(handle, inputType, fileName) + + +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + return _nvJitLink._nvJitLinkComplete(handle) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubinSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubin(handle, cubin) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtxSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtx(handle, ptx) + + +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLog(handle, log) diff --git a/cuda_bindings/nvJitLink.pxd b/cuda_bindings/nvJitLink.pxd new file mode 100644 index 000000000..d063002be --- /dev/null +++ b/cuda_bindings/nvJitLink.pxd @@ -0,0 +1,46 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .cynvJitLink cimport * + + +############################################################################### +# Types +############################################################################### + + + +ctypedef cudaStream_t Stream +ctypedef cudaDataType DataType +ctypedef libraryPropertyType_t LibraryPropertyType + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) +cpdef destroy(intptr_t handle) +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name) +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name) +cpdef complete(nvJitLinkHandle handle) +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin) +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx) +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log) +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log) diff --git a/cuda_bindings/nvJitLink.pyx b/cuda_bindings/nvJitLink.pyx new file mode 100644 index 000000000..18f4c7545 --- /dev/null +++ b/cuda_bindings/nvJitLink.pyx @@ -0,0 +1,138 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +cimport cython # NOQA + +from enum import IntEnum as _IntEnum + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Error handling +############################################################################### + +cdef dict STATUS={ + NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', + NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', + NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', // -arch=sm_NN option not specified + NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', + NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', + NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', + NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL', + NVJITLINK_ERROR_THREADPOOL : 'NVJITLINK_ERROR_THREADPOOL', + NVJITLINK_ERROR_UNRECOGNIZED_INPUT : 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT', + NVJITLINK_ERROR_NULL_INPUT : 'NVJITLINK_ERROR_NULL_INPUT', + NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS: 'NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS', + NVJITLINK_ERROR_INCORRECT_INPUT_TYPE: 'NVJITLINK_ERROR_INCORRECT_INPUT_TYPE', + NVJITLINK_ERROR_ARCH_MISMATCH : 'NVJITLINK_ERROR_ARCH_MISMATCH', + NVJITLINK_ERROR_OUTDATED_LIBRARY : 'NVJITLINK_ERROR_OUTDATED_LIBRARY', + NVJITLINK_ERROR_MISSING_FATBIN : 'NVJITLINK_ERROR_MISSING_FATBIN' +} + +class nvJitLinkError(Exception): + + def __init__(self, status): + self.status = status + cdef str err = STATUS[status] + super(nvJitLinkError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef inline void check_status(int status) nogil: + if status != 0: + with gil: + raise nvJitLinkError(status) + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): + with nogil: + status = nvJitLinkCreate(handle, num_options, options) + _check_status(status) + + +cpdef destroy(intptr_t handle): + with nogil: + status = nvJitLinkDestroy(handle) + _check_status(status) + + +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name): + with nogil: + status = nvJitLinkAddData(handle, input_type, data, size, name) + _check_status(status) + + +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name): + with nogil: + status = nvJitLinkAddFile(handle, input_type, file_name) + _check_status(status) + + +cpdef complete(nvJitLinkHandle handle): + with nogil: + status = nvJitLinkComplete(handle) + _check_status(status) + + +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedCubinSize(handle, size) + _check_status(status) + + +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin): + with nogil: + status = nvJitLinkGetLinkedCubin(handle, cubin) + _check_status(status) + + +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedPtxSize(handle, size) + _check_status(status) + + +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx): + with nogil: + status = nvJitLinkGetLinkedPtx(handle, ptx) + _check_status(status) + + +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetErrorLogSize(handle, size) + _check_status(status) + + +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetErrorLog(handle, log) + _check_status(status) + + +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetInfoLogSize(handle, size) + _check_status(status) + + +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetInfoLog(handle, log) + _check_status(status) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index fb9d7b953..27b83f946 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -57,7 +57,8 @@ 'cuda_egl_interop.h', 'cuda_gl_interop.h', 'cuda_vdpau_interop.h'], - 'nvrtc' : ['nvrtc.h']} + 'nvrtc' : ['nvrtc.h'], + 'nvJitLink' : ['nvJitLink.h'],} replace = {' __device_builtin__ ':' ', 'CUDARTAPI ':' ', diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py new file mode 100644 index 000000000..7ced5ff38 --- /dev/null +++ b/cuda_bindings/tests/test_nvJitLink.py @@ -0,0 +1,3 @@ +import pytest +from cuda import nvJitLink + From e893cd2fee43f4c9abb9311ec076c8356d3157bd Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 15 Oct 2024 10:11:07 -0700 Subject: [PATCH 04/34] add test file --- cuda_bindings/tests/test_nvJitLink.py | 161 ++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py index 7ced5ff38..f566ae7c6 100644 --- a/cuda_bindings/tests/test_nvJitLink.py +++ b/cuda_bindings/tests/test_nvJitLink.py @@ -1,3 +1,164 @@ import pytest from cuda import nvJitLink +def test_create_no_arch_error(): + # nvjitlink expects at least the architecture to be specified. + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): + nvJitLink.create() + + +def test_invalid_arch_error(): + # sm_XX is not a valid architecture + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-arch=sm_XX") + + +def test_unrecognized_option_error(): + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-fictitious_option") + + +def test_invalid_option_type_error(): + with pytest.raises(TypeError, match="Expecting only strings"): + nvJitLink.create("-arch", 53) + + +def test_create_and_destroy(): + handle = nvJitLink.create("-arch=sm_53") + assert handle != 0 + nvJitLink.destroy(handle) + + +def test_complete_empty(): + handle = nvJitLink.create("-arch=sm_75") + nvJitLink.complete(handle) + nvJitLink.destroy(handle) + + +@pytest.mark.parametrize( + "input_file,input_type", + [ + ("device_functions_cubin", nvJitLink.InputType.CUBIN), + ("device_functions_fatbin", InputType.FATBIN), + ("device_functions_ptx", InputType.PTX), + ("device_functions_object", InputType.OBJECT), + ("device_functions_archive", InputType.LIBRARY), + ], +) +def test_add_file(input_file, input_type, gpu_arch_flag, request): + filename, data = request.getfixturevalue(input_file) + + handle = nvJitLink.create(gpu_arch_flag) + nvJitLink.add_data(handle, input_type.value, data, filename) + nvJitLink.destroy(handle) + + +# We test the LTO input case separately as it requires the `-lto` flag. The +# OBJECT input type is used because the LTO-IR container is packaged in an ELF +# object when produced by NVCC. +def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) + nvJitLink.destroy(handle) + + +def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = undefined_extern_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError): + nvJitLink.complete(handle) + error_log = nvJitLink.get_error_log(handle) + nvJitLink.destroy(handle) + assert ( + "Undefined reference to '_Z5undefff' " + "in 'undefined_extern.cubin'" in error_log + ) + + +def test_get_info_log(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + info_log = nvJitLink.get_info_log(handle) + nvJitLink.destroy(handle) + # Info log is empty + assert "" == info_log + + +def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_cubin_link_not_complete_error( + device_functions_cubin, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_ptx_link_not_complete_error( + device_functions_ltoir_object, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + filename, data = device_functions_ltoir_object + input_type = InputType.OBJECT.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_package_version(): + assert pynvjitlink.__version__ is not None + assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file From 5d60eb1e36831156bd5d0b2d636571b2f82e638b Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 16 Oct 2024 15:45:29 -0700 Subject: [PATCH 05/34] more changes --- .../cuda/bindings/_internal/__init__.py | 0 .../nvJitLink.pxd => _internal/nvjitlink.pxd} | 4 +- .../nvjitlink.pyx} | 48 +-- .../bindings/_internal/nvjitlink_linux.pyx | 382 ++++++++++++++++++ .../nvjitlink_windows.pyx} | 50 +-- .../cuda/bindings/_internal/utils.pxd | 172 ++++++++ .../cuda/bindings/_internal/utils.pyx | 139 +++++++ .../bindings/cynvjitlink.pxd} | 29 +- .../bindings/cynvjitlink.pyx} | 30 +- cuda_bindings/cuda/bindings/nvjitlink.pxd | 43 ++ cuda_bindings/cuda/bindings/nvjitlink.pyx | 153 +++++++ cuda_bindings/nvJitLink.pxd | 46 --- cuda_bindings/nvJitLink.pyx | 138 ------- cuda_bindings/setup.py | 64 ++- .../{test_nvJitLink.py => test_nvjitlink.py} | 102 ++--- 15 files changed, 1086 insertions(+), 314 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_internal/__init__.py rename cuda_bindings/cuda/bindings/{_bindings/nvJitLink.pxd => _internal/nvjitlink.pxd} (95%) rename cuda_bindings/cuda/bindings/{_bindings/nvJitLink_linux.pyx => _internal/nvjitlink.pyx} (93%) create mode 100644 cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx rename cuda_bindings/cuda/bindings/{_bindings/nvJitLink_windows.pyx => _internal/nvjitlink_windows.pyx} (93%) create mode 100644 cuda_bindings/cuda/bindings/_internal/utils.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/utils.pyx rename cuda_bindings/{cynvJitLink.pxd => cuda/bindings/cynvjitlink.pxd} (60%) rename cuda_bindings/{cynvJitLink.pyx => cuda/bindings/cynvjitlink.pyx} (66%) create mode 100644 cuda_bindings/cuda/bindings/nvjitlink.pxd create mode 100644 cuda_bindings/cuda/bindings/nvjitlink.pyx delete mode 100644 cuda_bindings/nvJitLink.pxd delete mode 100644 cuda_bindings/nvJitLink.pyx rename cuda_bindings/tests/{test_nvJitLink.py => test_nvjitlink.py} (62%) diff --git a/cuda_bindings/cuda/bindings/_internal/__init__.py b/cuda_bindings/cuda/bindings/_internal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd similarity index 95% rename from cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd rename to cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd index dca128a0e..ac3a9023b 100644 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. -from ..cynvJitLink cimport * +from ..cynvjitlink cimport * ############################################################################### diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx similarity index 93% rename from cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx rename to cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx index 2fc6ca625..ff7a6ca3a 100644 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx @@ -2,11 +2,11 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. from libc.stdint cimport intptr_t -from .utils cimport get_nvJitLink_dso_version_suffix +from .utils cimport get_nvjitlink_dso_version_suffix from .utils import FunctionNotFoundError, NotSupportedError @@ -34,7 +34,7 @@ cdef extern from "" nogil: # Wrapper init ############################################################################### -cdef bint __py_nvJitLink_init = False +cdef bint __py_nvjitlink_init = False cdef void* __cuDriverGetVersion = NULL cdef void* __nvJitLinkCreate = NULL @@ -54,20 +54,20 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* load_library(const int driver_ver) except* with gil: cdef void* handle - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + so_name = "libnvjitlink.so" + (f".{suffix}" if suffix else suffix) handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) if handle != NULL: break else: err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + raise RuntimeError(f'Failed to dlopen libnvjitlink ({err_msg.decode()})') return handle -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: +cdef int _check_or_init_nvjitlink() except -1 nogil: + global __py_nvjitlink_init + if __py_nvjitlink_init: return 0 # Load driver to check version @@ -183,7 +183,7 @@ cdef int _check_or_init_nvJitLink() except -1 nogil: handle = load_library(driver_ver) __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') - __py_nvJitLink_init = True + __py_nvjitlink_init = True return 0 @@ -195,7 +195,7 @@ cpdef dict _inspect_function_pointers(): if func_ptrs is not None: return func_ptrs - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() cdef dict data = {} global __nvJitLinkCreate @@ -254,7 +254,7 @@ cpdef _inspect_function_pointer(str name): cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: global __nvJitLinkCreate - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkCreate == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkCreate is not found") @@ -264,7 +264,7 @@ cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptio cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: global __nvJitLinkDestroy - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkDestroy == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkDestroy is not found") @@ -274,7 +274,7 @@ cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: global __nvJitLinkAddData - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkAddData == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkAddData is not found") @@ -284,7 +284,7 @@ cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputTyp cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: global __nvJitLinkAddFile - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkAddFile == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkAddFile is not found") @@ -294,7 +294,7 @@ cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputTyp cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: global __nvJitLinkComplete - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkComplete == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkComplete is not found") @@ -304,7 +304,7 @@ cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedCubinSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") @@ -314,7 +314,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedCubin == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") @@ -324,7 +324,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubi cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedPtxSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") @@ -334,7 +334,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedPtx == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") @@ -344,7 +344,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) e cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetErrorLogSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") @@ -354,7 +354,7 @@ cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* s cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetErrorLog == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") @@ -364,7 +364,7 @@ cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) ex cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetInfoLogSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") @@ -374,7 +374,7 @@ cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* si cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetInfoLog == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx new file mode 100644 index 000000000..ff7a6ca3a --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -0,0 +1,382 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvjitlink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvjitlink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + so_name = "libnvjitlink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvjitlink ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvjitlink() except -1 nogil: + global __py_nvjitlink_init + if __py_nvjitlink_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvJitLinkCreate + __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') + if __nvJitLinkCreate == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') + + global __nvJitLinkDestroy + __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') + if __nvJitLinkDestroy == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') + + global __nvJitLinkAddData + __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') + if __nvJitLinkAddData == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') + + global __nvJitLinkAddFile + __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') + if __nvJitLinkAddFile == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') + + global __nvJitLinkComplete + __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') + if __nvJitLinkComplete == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') + + global __nvJitLinkGetLinkedCubinSize + __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') + if __nvJitLinkGetLinkedCubinSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') + + global __nvJitLinkGetLinkedCubin + __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') + if __nvJitLinkGetLinkedCubin == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') + + global __nvJitLinkGetLinkedPtxSize + __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') + if __nvJitLinkGetLinkedPtxSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') + + global __nvJitLinkGetLinkedPtx + __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') + if __nvJitLinkGetLinkedPtx == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') + + global __nvJitLinkGetErrorLogSize + __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') + if __nvJitLinkGetErrorLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') + + global __nvJitLinkGetErrorLog + __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') + if __nvJitLinkGetErrorLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') + + global __nvJitLinkGetInfoLogSize + __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') + if __nvJitLinkGetInfoLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') + + global __nvJitLinkGetInfoLog + __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') + if __nvJitLinkGetInfoLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + __py_nvjitlink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvjitlink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvjitlink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvjitlink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvjitlink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvjitlink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvjitlink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx similarity index 93% rename from cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx rename to cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 8856b59ca..43852441e 100644 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -2,11 +2,11 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. from libc.stdint cimport intptr_t -from .utils cimport get_nvJitLink_dso_version_suffix +from .utils cimport get_nvjitlink_dso_version_suffix import os import site @@ -23,7 +23,7 @@ from .utils import FunctionNotFoundError, NotSupportedError LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 -cdef bint __py_nvJitLink_init = False +cdef bint __py_nvjitlink_init = False cdef void* __cuDriverGetVersion = NULL cdef void* __nvJitLinkCreate = NULL @@ -48,10 +48,10 @@ cdef inline list get_site_packages(): cdef load_library(const int driver_ver): handle = 0 - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): if len(suffix) == 0: continue - dll_name = f"nvJitLink64_{suffix}.dll" + dll_name = f"nvjitlink64_{suffix}.dll" # First check if the DLL has been loaded by 3rd parties try: @@ -63,7 +63,7 @@ cdef load_library(const int driver_ver): # Next, check if DLLs are installed via pip for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + mod_path = os.path.join(sp, "nvidia", "nvjitlink", "bin") if not os.path.isdir(mod_path): continue os.add_dll_directory(mod_path) @@ -85,15 +85,15 @@ cdef load_library(const int driver_ver): else: break else: - raise RuntimeError('Failed to load nvJitLink') + raise RuntimeError('Failed to load nvjitlink') assert handle != 0 return handle -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: +cdef int _check_or_init_nvjitlink() except -1 nogil: + global __py_nvjitlink_init + if __py_nvjitlink_init: return 0 cdef int err, driver_ver @@ -194,7 +194,7 @@ cdef int _check_or_init_nvJitLink() except -1 nogil: except: pass - __py_nvJitLink_init = True + __py_nvjitlink_init = True return 0 @@ -206,7 +206,7 @@ cpdef dict _inspect_function_pointers(): if func_ptrs is not None: return func_ptrs - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() cdef dict data = {} global __nvJitLinkCreate @@ -265,7 +265,7 @@ cpdef _inspect_function_pointer(str name): cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: global __nvJitLinkCreate - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkCreate == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkCreate is not found") @@ -275,7 +275,7 @@ cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptio cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: global __nvJitLinkDestroy - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkDestroy == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkDestroy is not found") @@ -285,7 +285,7 @@ cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: global __nvJitLinkAddData - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkAddData == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkAddData is not found") @@ -295,7 +295,7 @@ cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputTyp cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: global __nvJitLinkAddFile - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkAddFile == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkAddFile is not found") @@ -305,7 +305,7 @@ cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputTyp cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: global __nvJitLinkComplete - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkComplete == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkComplete is not found") @@ -315,7 +315,7 @@ cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedCubinSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") @@ -325,7 +325,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedCubin == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") @@ -335,7 +335,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubi cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedPtxSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") @@ -345,7 +345,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetLinkedPtx == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") @@ -355,7 +355,7 @@ cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) e cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetErrorLogSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") @@ -365,7 +365,7 @@ cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* s cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetErrorLog == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") @@ -375,7 +375,7 @@ cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) ex cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetInfoLogSize == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") @@ -385,7 +385,7 @@ cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* si cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() + _check_or_init_nvjitlink() if __nvJitLinkGetInfoLog == NULL: with gil: raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd new file mode 100644 index 000000000..225ab3648 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -0,0 +1,172 @@ +from libc.stdint cimport int32_t, int64_t, intptr_t +from libcpp.vector cimport vector +from libcpp cimport bool as cppbool +from libcpp cimport nullptr_t, nullptr +from libcpp.memory cimport unique_ptr + + +cdef extern from * nogil: + """ + template + class nullable_unique_ptr { + public: + nullable_unique_ptr() noexcept = default; + + nullable_unique_ptr(std::nullptr_t) noexcept = delete; + + explicit nullable_unique_ptr(T* data, bool own_data): + own_data_(own_data) + { + if (own_data) + manager_.reset(data); + else + raw_data_ = data; + } + + nullable_unique_ptr(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr& operator=(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + } + + nullable_unique_ptr& operator=(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + return *this; + } + + ~nullable_unique_ptr() = default; + + void reset(T* data, bool own_data) + { + own_data_ = own_data; + if (own_data_) + { + manager_.reset(data); + raw_data_ = nullptr; + } + else + { + manager_.reset(nullptr); + raw_data_ = data; + } + } + + void swap(nullable_unique_ptr& other) noexcept + { + std::swap(manager_, other.manager_); + std::swap(raw_data_, other.raw_data_); + std::swap(own_data_, other.own_data_); + } + + /* + * Get the pointer to the underlying object (this is different from data()!). + */ + T* get() const noexcept + { + if (own_data_) + return manager_.get(); + else + return raw_data_; + } + + /* + * Get the pointer to the underlying buffer (this is different from get()!). + */ + void* data() noexcept + { + if (own_data_) + return manager_.get()->data(); + else + return raw_data_; + } + + T& operator*() + { + if (own_data_) + return *manager_; + else + return *raw_data_; + } + + private: + std::unique_ptr manager_{}; + T* raw_data_{nullptr}; + bool own_data_{false}; + }; + """ + # xref: cython/Cython/Includes/libcpp/memory.pxd + cdef cppclass nullable_unique_ptr[T]: + nullable_unique_ptr() + nullable_unique_ptr(T*, cppbool) + nullable_unique_ptr(nullable_unique_ptr[T]&) + + # Modifiers + void reset(T*, cppbool) + void swap(nullable_unique_ptr&) + + # Observers + T* get() + T& operator*() + void* data() + + +cdef extern from "" nogil: + ctypedef struct cuComplex: + pass + ctypedef struct cuDoubleComplex: + pass + + +ctypedef fused ResT: + int + int32_t + int64_t + + +ctypedef fused PtrT: + float + double + cuComplex + cuDoubleComplex + void + + +cdef cppclass nested_resource[T]: + nullable_unique_ptr[ vector[intptr_t] ] ptrs + nullable_unique_ptr[ vector[vector[T]] ] nested_resource_ptr + +cdef nullable_unique_ptr[ vector[ResT] ] get_resource_ptr_(object obj, ResT* __unused) +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 0 +cdef nullable_unique_ptr[ vector[PtrT*] ] get_resource_ptrs(object obj, PtrT* __unused) +cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused) + +cdef bint is_nested_sequence(data) +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx new file mode 100644 index 000000000..b575ddc03 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -0,0 +1,139 @@ +cimport cpython +from libc.stdint cimport intptr_t +from libcpp.utility cimport move +from cython.operator cimport dereference as deref + + +cdef bint is_nested_sequence(data): + if not cpython.PySequence_Check(data): + return False + else: + for i in data: + if not cpython.PySequence_Check(i): + return False + else: + return True + + +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*: + """The caller must ensure ``buf`` is alive when the returned pointer is in use.""" + cdef void* bufPtr + cdef int flags = cpython.PyBUF_ANY_CONTIGUOUS + if not readonly: + flags |= cpython.PyBUF_WRITABLE + cdef int status = -1 + cdef cpython.Py_buffer view + + if isinstance(buf, int): + bufPtr = buf + else: # try buffer protocol + try: + status = cpython.PyObject_GetBuffer(buf, &view, flags) + assert view.len == size + assert view.ndim == 1 + except Exception as e: + adj = "writable " if not readonly else "" + raise ValueError( + "buf must be either a Python int representing the pointer " + f"address to a valid buffer, or a 1D contiguous {adj}" + "buffer, of size bytes") from e + else: + bufPtr = view.buf + finally: + if status == 0: + cpython.PyBuffer_Release(&view) + + return bufPtr + + +# Cython can't infer the overload by return type alone, so we need a dummy +# input argument to help it +cdef nullable_unique_ptr[ vector[ResT] ] get_resource_ptr_(object obj, ResT* __unused): + cdef nullable_unique_ptr[ vector[ResT] ] ptr + cdef vector[ResT]* vec + if cpython.PySequence_Check(obj): + vec = new vector[ResT](len(obj)) + for i in range(len(obj)): + deref(vec)[i] = obj[i] + ptr.reset(vec, True) + else: + ptr.reset(obj, False) + return move(ptr) + +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 0: + cdef vector[ResT]* vec + if cpython.PySequence_Check(obj): + vec = new vector[ResT](len(obj)) + # set the ownership immediately to avoid + # leaking the `vec` memory in case of exception + # (e.g. ResT type range overflow) + # when populating the memory in the loop + in_out_ptr.reset(vec, True) + for i in range(len(obj)): + deref(vec)[i] = obj[i] + else: + in_out_ptr.reset(obj, False) + return 1 + + +cdef nullable_unique_ptr[ vector[PtrT*] ] get_resource_ptrs(object obj, PtrT* __unused): + cdef nullable_unique_ptr[ vector[PtrT*] ] ptr + cdef vector[PtrT*]* vec + if cpython.PySequence_Check(obj): + vec = new vector[PtrT*](len(obj)) + for i in range(len(obj)): + deref(vec)[i] = (obj[i]) + ptr.reset(vec, True) + else: + ptr.reset(obj, False) + return move(ptr) + + +cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused): + cdef nested_resource[ResT] res + cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr + cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr + cdef vector[intptr_t]* nested_vec = NULL + cdef vector[vector[ResT]]* nested_res_vec = NULL + cdef size_t i = 0, length = 0 + cdef intptr_t addr + + if is_nested_sequence(obj): + length = len(obj) + nested_res_vec = new vector[vector[ResT]](length) + nested_vec = new vector[intptr_t](length) + for i, obj_i in enumerate(obj): + deref(nested_res_vec)[i] = obj_i + deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) + nested_res_ptr.reset(nested_res_vec, True) + nested_ptr.reset(nested_vec, True) + elif cpython.PySequence_Check(obj): + length = len(obj) + nested_vec = new vector[intptr_t](length) + for i, addr in enumerate(obj): + deref(nested_vec)[i] = addr + nested_res_ptr.reset(NULL, False) + nested_ptr.reset(nested_vec, True) + else: + # obj is an int (ResT**) + nested_res_ptr.reset(NULL, False) + nested_ptr.reset(obj, False) + + res.ptrs = move(nested_ptr) + res.nested_resource_ptr = move(nested_res_ptr) + return move(res) + + +class FunctionNotFoundError(RuntimeError): pass + +class NotSupportedError(RuntimeError): pass + + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): + # applicable to both cuBLAS and cuBLASLt + if 11000 <= driver_ver < 12000: + return ('11', '') + elif 12000 <= driver_ver < 13000: + return ('12', '11', '') + else: + raise NotSupportedError('only CUDA 11/12 driver is supported') \ No newline at end of file diff --git a/cuda_bindings/cynvJitLink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd similarity index 60% rename from cuda_bindings/cynvJitLink.pxd rename to cuda_bindings/cuda/bindings/cynvjitlink.pxd index ed440c0b3..2913111f0 100644 --- a/cuda_bindings/cynvJitLink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. -from libc.stdint cimport int64_t +from libc.stdint cimport intptr_t, uint32_t ############################################################################### @@ -13,7 +13,28 @@ from libc.stdint cimport int64_t ############################################################################### # enums +ctypedef enum nvJitLinkResult "nvJitLinkResult": + NVJITLINK_SUCCESS "NVJITLINK_SUCCESS" = 0 + NVJITLINK_ERROR_UNRECOGNIZED_OPTION "NVJITLINK_ERROR_UNRECOGNIZED_OPTION" + NVJITLINK_ERROR_MISSING_ARCH "NVJITLINK_ERROR_MISSING_ARCH" + NVJITLINK_ERROR_INVALID_INPUT "NVJITLINK_ERROR_INVALID_INPUT" + NVJITLINK_ERROR_PTX_COMPILE "NVJITLINK_ERROR_PTX_COMPILE" + NVJITLINK_ERROR_NVVM_COMPILE "NVJITLINK_ERROR_NVVM_COMPILE" + NVJITLINK_ERROR_INTERNAL "NVJITLINK_ERROR_INTERNAL" + NVJITLINK_ERROR_THREADPOOL "NVJITLINK_ERROR_THREADPOOL" + NVJITLINK_ERROR_UNRECOGNIZED_INPUT "NVJITLINK_ERROR_UNRECOGNIZED_INPUT" + NVJITLINK_ERROR_FINALIZE "NVJITLINK_ERROR_FINALIZE" +ctypedef enum nvJitLinkInputType "nvJitLinkInputType": + NVJITLINK_INPUT_NONE "NVJITLINK_INPUT_NONE" = 0 + NVJITLINK_INPUT_CUBIN "NVJITLINK_INPUT_CUBIN" = 1 + NVJITLINK_INPUT_PTX "NVJITLINK_INPUT_PTX" + NVJITLINK_INPUT_LTOIR "NVJITLINK_INPUT_LTOIR" + NVJITLINK_INPUT_FATBIN "NVJITLINK_INPUT_FATBIN" + NVJITLINK_INPUT_OBJECT "NVJITLINK_INPUT_OBJECT" + NVJITLINK_INPUT_LIBRARY "NVJITLINK_INPUT_LIBRARY" + NVJITLINK_INPUT_INDEX "NVJITLINK_INPUT_INDEX" + NVJITLINK_INPUT_ANY "NVJITLINK_INPUT_ANY" = 10 # types @@ -26,7 +47,7 @@ cdef extern from *: ctypedef void* cudaStream_t 'cudaStream_t' - +ctypedef void* nvJitLinkHandle 'nvJitLinkHandle' ############################################################################### @@ -45,4 +66,4 @@ cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) ex cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil \ No newline at end of file diff --git a/cuda_bindings/cynvJitLink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx similarity index 66% rename from cuda_bindings/cynvJitLink.pyx rename to cuda_bindings/cuda/bindings/cynvjitlink.pyx index 65d3f9840..a6703cc0f 100644 --- a/cuda_bindings/cynvJitLink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. -from ._internal cimport nvJitLink as _nvJitLink +from ._internal cimport nvjitlink as _nvjitlink ############################################################################### @@ -12,52 +12,52 @@ from ._internal cimport nvJitLink as _nvJitLink ############################################################################### cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - return _nvJitLink._nvJitLinkCreate(handle, numOptions, options) + return _nvjitlink._nvJitLinkCreate(handle, numOptions, options) cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - return _nvJitLink._nvJitLinkDestroy(handle) + return _nvjitlink._nvJitLinkDestroy(handle) cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - return _nvJitLink._nvJitLinkAddData(handle, inputType, data, size, name) + return _nvjitlink._nvJitLinkAddData(handle, inputType, data, size, name) cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - return _nvJitLink._nvJitLinkAddFile(handle, inputType, fileName) + return _nvjitlink._nvJitLinkAddFile(handle, inputType, fileName) cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - return _nvJitLink._nvJitLinkComplete(handle) + return _nvjitlink._nvJitLinkComplete(handle) cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedCubinSize(handle, size) + return _nvjitlink._nvJitLinkGetLinkedCubinSize(handle, size) cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedCubin(handle, cubin) + return _nvjitlink._nvJitLinkGetLinkedCubin(handle, cubin) cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedPtxSize(handle, size) + return _nvjitlink._nvJitLinkGetLinkedPtxSize(handle, size) cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedPtx(handle, ptx) + return _nvjitlink._nvJitLinkGetLinkedPtx(handle, ptx) cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetErrorLogSize(handle, size) + return _nvjitlink._nvJitLinkGetErrorLogSize(handle, size) cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - return _nvJitLink._nvJitLinkGetErrorLog(handle, log) + return _nvjitlink._nvJitLinkGetErrorLog(handle, log) cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetInfoLogSize(handle, size) + return _nvjitlink._nvJitLinkGetInfoLogSize(handle, size) cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - return _nvJitLink._nvJitLinkGetInfoLog(handle, log) + return _nvjitlink._nvJitLinkGetInfoLog(handle, log) \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd new file mode 100644 index 000000000..de4d46170 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvjitlink cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvJitLinkHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvJitLinkResult _NvJitLinkResult +ctypedef nvJitLinkInputType _NvJitLinkInputType + + +############################################################################### +# Functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) +cpdef destroy(intptr_t handle) +cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name) +cpdef add_file(intptr_t handle, int input_type, intptr_t file_name) +cpdef complete(intptr_t handle) +cpdef get_linked_cubin_size(intptr_t handle, intptr_t size) +cpdef get_linked_cubin(intptr_t handle, intptr_t cubin) +cpdef get_linked_ptx_size(intptr_t handle, intptr_t size) +cpdef get_linked_ptx(intptr_t handle, intptr_t ptx) +cpdef get_error_log_size(intptr_t handle, intptr_t size) +cpdef get_error_log(intptr_t handle, intptr_t log) +cpdef get_info_log_size(intptr_t handle, intptr_t size) +cpdef get_info_log(intptr_t handle, intptr_t log) \ No newline at end of file diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx new file mode 100644 index 000000000..8c1a89976 --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -0,0 +1,153 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. + +cimport cython # NOQA + +from enum import IntEnum as _IntEnum + + +############################################################################### +# Enum +############################################################################### + +class NvJitLinkResult(_IntEnum): + """See `nvJitLinkResult`.""" + SUCCESS = NVJITLINK_SUCCESS + ERROR_UNRECOGNIZED_OPTION = NVJITLINK_ERROR_UNRECOGNIZED_OPTION + ERROR_MISSING_ARCH = NVJITLINK_ERROR_MISSING_ARCH + ERROR_INVALID_INPUT = NVJITLINK_ERROR_INVALID_INPUT + ERROR_PTX_COMPILE = NVJITLINK_ERROR_PTX_COMPILE + ERROR_NVVM_COMPILE = NVJITLINK_ERROR_NVVM_COMPILE + ERROR_INTERNAL = NVJITLINK_ERROR_INTERNAL + ERROR_THREADPOOL = NVJITLINK_ERROR_THREADPOOL + ERROR_UNRECOGNIZED_INPUT = NVJITLINK_ERROR_UNRECOGNIZED_INPUT + ERROR_FINALIZE = NVJITLINK_ERROR_FINALIZE + +class NvJitLinkInputType(_IntEnum): + """See `nvJitLinkInputType`.""" + INPUT_NONE = NVJITLINK_INPUT_NONE + INPUT_CUBIN = NVJITLINK_INPUT_CUBIN + INPUT_PTX = NVJITLINK_INPUT_PTX + INPUT_LTOIR = NVJITLINK_INPUT_LTOIR + INPUT_FATBIN = NVJITLINK_INPUT_FATBIN + INPUT_OBJECT = NVJITLINK_INPUT_OBJECT + INPUT_LIBRARY = NVJITLINK_INPUT_LIBRARY + INPUT_INDEX = NVJITLINK_INPUT_INDEX + INPUT_ANY = NVJITLINK_INPUT_ANY + + +############################################################################### +# Error handling +############################################################################### + +cdef dict STATUS={ + NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', + NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', + NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', + NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', + NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', + NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', + NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL' +} + +class nvJitLinkError(Exception): + + def __init__(self, status): + self.status = status + cdef str err = STATUS[status] + super(nvJitLinkError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef inline void check_status(int status) nogil: + if status != 0: + with gil: + raise nvJitLinkError(status) + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): + with nogil: + status = nvJitLinkCreate(handle, num_options, options) + check_status(status) + + +cpdef destroy(intptr_t handle): + with nogil: + status = nvJitLinkDestroy(handle) + check_status(status) + + +cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name): + with nogil: + status = nvJitLinkAddData(handle, <_NvJitLinkInputType>input_type, data, size, name) + check_status(status) + + +cpdef add_file(intptr_t handle, int input_type, intptr_t file_name): + with nogil: + status = nvJitLinkAddFile(handle, <_NvJitLinkInputType>input_type, file_name) + check_status(status) + + +cpdef complete(intptr_t handle): + with nogil: + status = nvJitLinkComplete(handle) + check_status(status) + + +cpdef get_linked_cubin_size(intptr_t handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedCubinSize(handle, size) + check_status(status) + + +cpdef get_linked_cubin(intptr_t handle, intptr_t cubin): + with nogil: + status = nvJitLinkGetLinkedCubin(handle, cubin) + check_status(status) + + +cpdef get_linked_ptx_size(intptr_t handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedPtxSize(handle, size) + check_status(status) + + +cpdef get_linked_ptx(intptr_t handle, intptr_t ptx): + with nogil: + status = nvJitLinkGetLinkedPtx(handle, ptx) + check_status(status) + + +cpdef get_error_log_size(intptr_t handle, intptr_t size): + with nogil: + status = nvJitLinkGetErrorLogSize(handle, size) + check_status(status) + + +cpdef get_error_log(intptr_t handle, intptr_t log): + with nogil: + status = nvJitLinkGetErrorLog(handle, log) + check_status(status) + + +cpdef get_info_log_size(intptr_t handle, intptr_t size): + with nogil: + status = nvJitLinkGetInfoLogSize(handle, size) + check_status(status) + + +cpdef get_info_log(intptr_t handle, intptr_t log): + with nogil: + status = nvJitLinkGetInfoLog(handle, log) + check_status(status) \ No newline at end of file diff --git a/cuda_bindings/nvJitLink.pxd b/cuda_bindings/nvJitLink.pxd deleted file mode 100644 index d063002be..000000000 --- a/cuda_bindings/nvJitLink.pxd +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .cynvJitLink cimport * - - -############################################################################### -# Types -############################################################################### - - - -ctypedef cudaStream_t Stream -ctypedef cudaDataType DataType -ctypedef libraryPropertyType_t LibraryPropertyType - - -############################################################################### -# Enum -############################################################################### - - - - -############################################################################### -# Functions -############################################################################### - -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) -cpdef destroy(intptr_t handle) -cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name) -cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name) -cpdef complete(nvJitLinkHandle handle) -cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin) -cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx) -cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_error_log(nvJitLinkHandle handle, intptr_t log) -cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_info_log(nvJitLinkHandle handle, intptr_t log) diff --git a/cuda_bindings/nvJitLink.pyx b/cuda_bindings/nvJitLink.pyx deleted file mode 100644 index 18f4c7545..000000000 --- a/cuda_bindings/nvJitLink.pyx +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -cimport cython # NOQA - -from enum import IntEnum as _IntEnum - - -############################################################################### -# Enum -############################################################################### - - - - -############################################################################### -# Error handling -############################################################################### - -cdef dict STATUS={ - NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', - NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', - NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', // -arch=sm_NN option not specified - NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', - NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', - NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', - NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL', - NVJITLINK_ERROR_THREADPOOL : 'NVJITLINK_ERROR_THREADPOOL', - NVJITLINK_ERROR_UNRECOGNIZED_INPUT : 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT', - NVJITLINK_ERROR_NULL_INPUT : 'NVJITLINK_ERROR_NULL_INPUT', - NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS: 'NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS', - NVJITLINK_ERROR_INCORRECT_INPUT_TYPE: 'NVJITLINK_ERROR_INCORRECT_INPUT_TYPE', - NVJITLINK_ERROR_ARCH_MISMATCH : 'NVJITLINK_ERROR_ARCH_MISMATCH', - NVJITLINK_ERROR_OUTDATED_LIBRARY : 'NVJITLINK_ERROR_OUTDATED_LIBRARY', - NVJITLINK_ERROR_MISSING_FATBIN : 'NVJITLINK_ERROR_MISSING_FATBIN' -} - -class nvJitLinkError(Exception): - - def __init__(self, status): - self.status = status - cdef str err = STATUS[status] - super(nvJitLinkError, self).__init__(err) - - def __reduce__(self): - return (type(self), (self.status,)) - - -@cython.profile(False) -cdef inline void check_status(int status) nogil: - if status != 0: - with gil: - raise nvJitLinkError(status) - - -############################################################################### -# Wrapper functions -############################################################################### - -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): - with nogil: - status = nvJitLinkCreate(handle, num_options, options) - _check_status(status) - - -cpdef destroy(intptr_t handle): - with nogil: - status = nvJitLinkDestroy(handle) - _check_status(status) - - -cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name): - with nogil: - status = nvJitLinkAddData(handle, input_type, data, size, name) - _check_status(status) - - -cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name): - with nogil: - status = nvJitLinkAddFile(handle, input_type, file_name) - _check_status(status) - - -cpdef complete(nvJitLinkHandle handle): - with nogil: - status = nvJitLinkComplete(handle) - _check_status(status) - - -cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetLinkedCubinSize(handle, size) - _check_status(status) - - -cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin): - with nogil: - status = nvJitLinkGetLinkedCubin(handle, cubin) - _check_status(status) - - -cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetLinkedPtxSize(handle, size) - _check_status(status) - - -cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx): - with nogil: - status = nvJitLinkGetLinkedPtx(handle, ptx) - _check_status(status) - - -cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetErrorLogSize(handle, size) - _check_status(status) - - -cpdef get_error_log(nvJitLinkHandle handle, intptr_t log): - with nogil: - status = nvJitLinkGetErrorLog(handle, log) - _check_status(status) - - -cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetInfoLogSize(handle, size) - _check_status(status) - - -cpdef get_info_log(nvJitLinkHandle handle, intptr_t log): - with nogil: - status = nvJitLinkGetInfoLog(handle, log) - _check_status(status) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 27b83f946..d7baf018b 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -11,6 +11,7 @@ import platform import sys import sysconfig +import atexit from Cython import Tempita from Cython.Build import cythonize @@ -19,6 +20,8 @@ from setuptools.extension import Extension from setuptools.command.build_ext import build_ext import versioneer +import tempfile +import shutil # ---------------------------------------------------------------------- @@ -90,16 +93,13 @@ break if not os.path.exists(path): print(f'Missing header {header}') - print(f'Parsing {library} headers') parser = CParser(header_paths, cache='./cache_{}'.format(library.split('.')[0]) if PARSER_CACHING else None, replace=replace) - if library == 'driver': CUDA_VERSION = parser.defs['macros']['CUDA_VERSION'] if 'CUDA_VERSION' in parser.defs['macros'] else 'Unknown' print(f'Found CUDA_VERSION: {CUDA_VERSION}') - # Combine types with others since they sometimes get tangled found_types += {key for key in parser.defs['types']} found_types += {key for key in parser.defs['structs']} @@ -109,16 +109,13 @@ found_types += {key for key in parser.defs['enums']} found_functions += {key for key in parser.defs['functions']} found_values += {key for key in parser.defs['values']} - if len(found_functions) == 0: raise RuntimeError(f'Parser found no functions. Is CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') - # Unwrap struct and union members def unwrapMembers(found_dict): for key in found_dict: members = [var for var, _, _ in found_dict[key]['members']] found_dict[key]['members'] = members - unwrapMembers(found_structs) unwrapMembers(found_unions) @@ -148,7 +145,9 @@ def generate_output(infile, local): os.path.join('cuda', 'bindings'), os.path.join('cuda', 'bindings', '_bindings'), os.path.join('cuda', 'bindings', '_lib'), - os.path.join('cuda', 'bindings', '_lib', 'cyruntime')] + os.path.join('cuda', 'bindings', '_lib', 'cyruntime'), + os.path.join('cuda', 'bindings', '_internal'), + ] input_files = [] for path in path_list: input_files += fetch_input_files(path) @@ -183,6 +182,7 @@ def generate_output(infile, local): # For Setup extensions = [] +new_extensions = [] cmdclass = {} # ---------------------------------------------------------------------- @@ -192,6 +192,7 @@ def prep_extensions(sources): pattern = sources[0] files = glob.glob(pattern) exts = [] + print(include_dirs, library_dirs) for pyx in files: mod_name = pyx.replace(".pyx", "").replace(os.sep, ".").replace("/", ".") exts.append( @@ -208,6 +209,34 @@ def prep_extensions(sources): ) return exts +# new path for the bindings from cybind +def rename_architecture_specific_files(): + if sys.platform == 'linux': + src_files = glob.glob('cuda/bindings/_internal/*_linux.pyx') + elif sys.platform == 'win32': + src_files = glob.glob('cuda/bindings/_internal/*_windows.pyx') + else: + raise RuntimeError(f'platform is unrecognized: {sys.platform}') + dst_files = [] + for src in src_files: + # Set up a temporary file; it must be under the cache directory so + # that atomic moves within the same filesystem can be guaranteed + with tempfile.NamedTemporaryFile(delete=False, dir='.') as f: + shutil.copy2(src, f.name) + f_name = f.name + dst = src.replace('_linux', '').replace('_windows', '') + # atomic move with the destination guaranteed to be overwritten + os.replace(f_name, f"./{dst}") + dst_files.append(dst) + +@atexit.register +def cleanup_dst_files(): + pass + # for dst in sources_list: + # try: + # os.remove(dst) + # except FileNotFoundError: + # pass def do_cythonize(extensions): return cythonize( @@ -231,11 +260,20 @@ def do_cythonize(extensions): ["cuda/*.pyx"], # tests ["tests/*.pyx"], + # interal files used by cybind + ['cuda/bindings/_internal/*.pyx'], ] + +rename_architecture_specific_files() + for sources in sources_list: extensions += prep_extensions(sources) +# for sources in new_sources_list: +# new_extensions += prep_extensions(sources) + + # --------------------------------------------------------------------- # Custom build_ext command # Files are build in two steps: @@ -258,14 +296,20 @@ def finalize_options(self): # ---------------------------------------------------------------------- # Setup +package_data=dict.fromkeys( + find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), + ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], + ) + setup( version=versioneer.get_version(), ext_modules=do_cythonize(extensions), - packages=find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]), + packages=find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), package_data=dict.fromkeys( - find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]), + find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], ), + cmdclass=cmdclass, zip_safe=False, -) +) \ No newline at end of file diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvjitlink.py similarity index 62% rename from cuda_bindings/tests/test_nvJitLink.py rename to cuda_bindings/tests/test_nvjitlink.py index f566ae7c6..37129e4a2 100644 --- a/cuda_bindings/tests/test_nvJitLink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -1,44 +1,46 @@ import pytest -from cuda import nvJitLink +from cuda.bindings import nvjitlink + +dir(nvjitlink) def test_create_no_arch_error(): # nvjitlink expects at least the architecture to be specified. with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): - nvJitLink.create() + nvjitlink.create() def test_invalid_arch_error(): # sm_XX is not a valid architecture with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-arch=sm_XX") + nvjitlink.create("-arch=sm_XX") def test_unrecognized_option_error(): with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-fictitious_option") + nvjitlink.create("-fictitious_option") def test_invalid_option_type_error(): with pytest.raises(TypeError, match="Expecting only strings"): - nvJitLink.create("-arch", 53) + nvjitlink.create("-arch", 53) def test_create_and_destroy(): - handle = nvJitLink.create("-arch=sm_53") + handle = nvjitlink.create("-arch=sm_53") assert handle != 0 - nvJitLink.destroy(handle) + nvjitlink.destroy(handle) def test_complete_empty(): - handle = nvJitLink.create("-arch=sm_75") - nvJitLink.complete(handle) - nvJitLink.destroy(handle) + handle = nvjitlink.create("-arch=sm_75") + nvjitlink.complete(handle) + nvjitlink.destroy(handle) @pytest.mark.parametrize( "input_file,input_type", [ - ("device_functions_cubin", nvJitLink.InputType.CUBIN), + ("device_functions_cubin", nvjitlink.InputType.CUBIN), ("device_functions_fatbin", InputType.FATBIN), ("device_functions_ptx", InputType.PTX), ("device_functions_object", InputType.OBJECT), @@ -48,9 +50,9 @@ def test_complete_empty(): def test_add_file(input_file, input_type, gpu_arch_flag, request): filename, data = request.getfixturevalue(input_file) - handle = nvJitLink.create(gpu_arch_flag) - nvJitLink.add_data(handle, input_type.value, data, filename) - nvJitLink.destroy(handle) + handle = nvjitlink.create(gpu_arch_flag) + nvjitlink.add_data(handle, input_type.value, data, filename) + nvjitlink.destroy(handle) # We test the LTO input case separately as it requires the `-lto` flag. The @@ -59,20 +61,20 @@ def test_add_file(input_file, input_type, gpu_arch_flag, request): def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): filename, data = device_functions_ltoir_object - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) - nvJitLink.destroy(handle) + handle = nvjitlink.create(gpu_arch_flag, "-lto") + nvjitlink.add_data(handle, InputType.OBJECT.value, data, filename) + nvjitlink.destroy(handle) def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) + handle = nvjitlink.create(gpu_arch_flag) filename, data = undefined_extern_cubin input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) + nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError): - nvJitLink.complete(handle) - error_log = nvJitLink.get_error_log(handle) - nvJitLink.destroy(handle) + nvjitlink.complete(handle) + error_log = nvjitlink.get_error_log(handle) + nvjitlink.destroy(handle) assert ( "Undefined reference to '_Z5undefff' " "in 'undefined_extern.cubin'" in error_log @@ -80,25 +82,25 @@ def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): def test_get_info_log(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) + handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - info_log = nvJitLink.get_info_log(handle) - nvJitLink.destroy(handle) + nvjitlink.add_data(handle, input_type, data, filename) + nvjitlink.complete(handle) + info_log = nvjitlink.get_info_log(handle) + nvjitlink.destroy(handle) # Info log is empty assert "" == info_log def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) + handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) + nvjitlink.add_data(handle, input_type, data, filename) + nvjitlink.complete(handle) + cubin = nvjitlink.get_linked_cubin(handle) + nvjitlink.destroy(handle) # Just check we got something that looks like an ELF assert cubin[:4] == b"\x7fELF" @@ -107,13 +109,13 @@ def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): def test_get_linked_cubin_link_not_complete_error( device_functions_cubin, gpu_arch_flag ): - handle = nvJitLink.create(gpu_arch_flag) + handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) + nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) + nvjitlink.get_linked_cubin(handle) + nvjitlink.destroy(handle) def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): @@ -123,11 +125,11 @@ def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag) # LTO is requested. So we need to use the OBJECT input type, and the linker # retrieves the LTO IR from it because we passed the -lto flag. input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) + handle = nvjitlink.create(gpu_arch_flag, "-lto") + nvjitlink.add_data(handle, input_type, data, filename) + nvjitlink.complete(handle) + cubin = nvjitlink.get_linked_cubin(handle) + nvjitlink.destroy(handle) # Just check we got something that looks like an ELF assert cubin[:4] == b"\x7fELF" @@ -140,23 +142,23 @@ def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): # LTO is requested. So we need to use the OBJECT input type, and the linker # retrieves the LTO IR from it because we passed the -lto flag. input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) + handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") + nvjitlink.add_data(handle, input_type, data, filename) + nvjitlink.complete(handle) + nvjitlink.get_linked_ptx(handle) + nvjitlink.destroy(handle) def test_get_linked_ptx_link_not_complete_error( device_functions_ltoir_object, gpu_arch_flag ): - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") filename, data = device_functions_ltoir_object input_type = InputType.OBJECT.value - nvJitLink.add_data(handle, input_type, data, filename) + nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) + nvjitlink.get_linked_ptx(handle) + nvjitlink.destroy(handle) def test_package_version(): From 8c4029f5cf5f7f8f9cdd79eef2b22a19fc2d07cd Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 16 Oct 2024 16:30:19 -0700 Subject: [PATCH 06/34] working --- .../bindings/_internal/nvjitlink_windows.pyx | 4 +-- cuda_bindings/cuda/bindings/cynvjitlink.pxd | 2 +- cuda_bindings/cuda/bindings/cynvjitlink.pyx | 1 + cuda_bindings/setup.py | 36 +++++++++---------- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 43852441e..5cac180f3 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,14 +6,14 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix +from utils cimport get_nvjitlink_dso_version_suffix import os import site import win32api -from .utils import FunctionNotFoundError, NotSupportedError +from utils import FunctionNotFoundError, NotSupportedError ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 2913111f0..3dcc1d4ec 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -5,7 +5,7 @@ # This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. -from libc.stdint cimport intptr_t, uint32_t +from libc.stdint cimport uint32_t ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx index a6703cc0f..5e882524e 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -5,6 +5,7 @@ # This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. from ._internal cimport nvjitlink as _nvjitlink +from libc.stdint cimport uint32_t ############################################################################### diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index d7baf018b..8ffb50d63 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -210,11 +210,11 @@ def prep_extensions(sources): return exts # new path for the bindings from cybind -def rename_architecture_specific_files(): +def rename_architecture_specific_files(path): if sys.platform == 'linux': - src_files = glob.glob('cuda/bindings/_internal/*_linux.pyx') + src_files = glob.glob(os.path.join(path, '*_linux.pyx')) elif sys.platform == 'win32': - src_files = glob.glob('cuda/bindings/_internal/*_windows.pyx') + src_files = glob.glob(os.path.join(path, '*_windows.pyx')) else: raise RuntimeError(f'platform is unrecognized: {sys.platform}') dst_files = [] @@ -232,11 +232,13 @@ def rename_architecture_specific_files(): @atexit.register def cleanup_dst_files(): pass - # for dst in sources_list: - # try: - # os.remove(dst) - # except FileNotFoundError: - # pass + for dst in architechture_specific_files_dir: + try: + os.remove(dst) + except FileNotFoundError: + pass + +architechture_specific_files_dir = 'cuda/bindings/_internal/' def do_cythonize(extensions): return cythonize( @@ -247,6 +249,7 @@ def do_cythonize(extensions): ), **extra_cythonize_kwargs) +rename_architecture_specific_files(architechture_specific_files_dir) sources_list = [ # private @@ -260,20 +263,18 @@ def do_cythonize(extensions): ["cuda/*.pyx"], # tests ["tests/*.pyx"], - # interal files used by cybind - ['cuda/bindings/_internal/*.pyx'], + + # interal files used by cybind. We on + ['cuda/bindings/_internal/nvjitlink.pyx'], + ['cuda/bindings/_internal/utils.pyx'], + ] -rename_architecture_specific_files() for sources in sources_list: extensions += prep_extensions(sources) -# for sources in new_sources_list: -# new_extensions += prep_extensions(sources) - - # --------------------------------------------------------------------- # Custom build_ext command # Files are build in two steps: @@ -296,11 +297,6 @@ def finalize_options(self): # ---------------------------------------------------------------------- # Setup -package_data=dict.fromkeys( - find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), - ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], - ) - setup( version=versioneer.get_version(), ext_modules=do_cythonize(extensions), From 8852a9252441fe101e8e754643387aa1358b004a Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 15 Oct 2024 12:47:51 -0700 Subject: [PATCH 07/34] rebase --- .../cuda/bindings/_bindings/nvJitLink.pxd | 26 ++ .../bindings/_bindings/nvJitLink_linux.pyx | 382 +++++++++++++++++ .../bindings/_bindings/nvJitLink_windows.pyx | 393 ++++++++++++++++++ cuda_bindings/cynvJitLink.pxd | 48 +++ cuda_bindings/cynvJitLink.pyx | 63 +++ cuda_bindings/nvJitLink.pxd | 46 ++ cuda_bindings/nvJitLink.pyx | 138 ++++++ cuda_bindings/tests/test_nvJitLink.py | 3 + 8 files changed, 1099 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx create mode 100644 cuda_bindings/cynvJitLink.pxd create mode 100644 cuda_bindings/cynvJitLink.pyx create mode 100644 cuda_bindings/nvJitLink.pxd create mode 100644 cuda_bindings/nvJitLink.pyx create mode 100644 cuda_bindings/tests/test_nvJitLink.py diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd new file mode 100644 index 000000000..dca128a0e --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd @@ -0,0 +1,26 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ..cynvJitLink cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx new file mode 100644 index 000000000..2fc6ca625 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx @@ -0,0 +1,382 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvJitLinkCreate + __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') + if __nvJitLinkCreate == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') + + global __nvJitLinkDestroy + __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') + if __nvJitLinkDestroy == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') + + global __nvJitLinkAddData + __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') + if __nvJitLinkAddData == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') + + global __nvJitLinkAddFile + __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') + if __nvJitLinkAddFile == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') + + global __nvJitLinkComplete + __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') + if __nvJitLinkComplete == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') + + global __nvJitLinkGetLinkedCubinSize + __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') + if __nvJitLinkGetLinkedCubinSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') + + global __nvJitLinkGetLinkedCubin + __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') + if __nvJitLinkGetLinkedCubin == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') + + global __nvJitLinkGetLinkedPtxSize + __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') + if __nvJitLinkGetLinkedPtxSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') + + global __nvJitLinkGetLinkedPtx + __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') + if __nvJitLinkGetLinkedPtx == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') + + global __nvJitLinkGetErrorLogSize + __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') + if __nvJitLinkGetErrorLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') + + global __nvJitLinkGetErrorLog + __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') + if __nvJitLinkGetErrorLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') + + global __nvJitLinkGetInfoLogSize + __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') + if __nvJitLinkGetInfoLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') + + global __nvJitLinkGetInfoLog + __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') + if __nvJitLinkGetInfoLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx new file mode 100644 index 000000000..8856b59ca --- /dev/null +++ b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx @@ -0,0 +1,393 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvJitLink_dso_version_suffix + +import os +import site + +import win32api + +from .utils import FunctionNotFoundError, NotSupportedError + + +############################################################################### +# Wrapper init +############################################################################### + +LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 +LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +cdef bint __py_nvJitLink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL + + +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvJitLink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink64_{suffix}.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvJitLink') + + assert handle != 0 + return handle + + +cdef int _check_or_init_nvJitLink() except -1 nogil: + global __py_nvJitLink_init + if __py_nvJitLink_init: + return 0 + + cdef int err, driver_ver + with gil: + # Load driver to check version + try: + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + except Exception as e: + raise NotSupportedError(f'CUDA driver is not found ({e})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + if __cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + # Load library + handle = load_library(driver_ver) + + # Load function + global __nvJitLinkCreate + try: + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + except: + pass + + global __nvJitLinkDestroy + try: + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + except: + pass + + global __nvJitLinkAddData + try: + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + except: + pass + + global __nvJitLinkAddFile + try: + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + except: + pass + + global __nvJitLinkComplete + try: + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + except: + pass + + global __nvJitLinkGetLinkedCubinSize + try: + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + except: + pass + + global __nvJitLinkGetLinkedCubin + try: + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + except: + pass + + global __nvJitLinkGetLinkedPtxSize + try: + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + except: + pass + + global __nvJitLinkGetLinkedPtx + try: + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + except: + pass + + global __nvJitLinkGetErrorLogSize + try: + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + except: + pass + + global __nvJitLinkGetErrorLog + try: + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + except: + pass + + global __nvJitLinkGetInfoLogSize + try: + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + except: + pass + + global __nvJitLinkGetInfoLog + try: + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + except: + pass + + __py_nvJitLink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvJitLink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvJitLink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvJitLink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvJitLink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvJitLink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvJitLink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvJitLink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvJitLink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvJitLink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) diff --git a/cuda_bindings/cynvJitLink.pxd b/cuda_bindings/cynvJitLink.pxd new file mode 100644 index 000000000..ed440c0b3 --- /dev/null +++ b/cuda_bindings/cynvJitLink.pxd @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + + +from libc.stdint cimport int64_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums + + + +# types +cdef extern from *: + """ + #include + #include + #include + """ + ctypedef void* cudaStream_t 'cudaStream_t' + + + + + +############################################################################### +# Functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cynvJitLink.pyx b/cuda_bindings/cynvJitLink.pyx new file mode 100644 index 000000000..65d3f9840 --- /dev/null +++ b/cuda_bindings/cynvJitLink.pyx @@ -0,0 +1,63 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from ._internal cimport nvJitLink as _nvJitLink + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + return _nvJitLink._nvJitLinkCreate(handle, numOptions, options) + + +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + return _nvJitLink._nvJitLinkDestroy(handle) + + +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + return _nvJitLink._nvJitLinkAddData(handle, inputType, data, size, name) + + +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + return _nvJitLink._nvJitLinkAddFile(handle, inputType, fileName) + + +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + return _nvJitLink._nvJitLinkComplete(handle) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubinSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedCubin(handle, cubin) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtxSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + return _nvJitLink._nvJitLinkGetLinkedPtx(handle, ptx) + + +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetErrorLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvJitLink._nvJitLinkGetInfoLog(handle, log) diff --git a/cuda_bindings/nvJitLink.pxd b/cuda_bindings/nvJitLink.pxd new file mode 100644 index 000000000..d063002be --- /dev/null +++ b/cuda_bindings/nvJitLink.pxd @@ -0,0 +1,46 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .cynvJitLink cimport * + + +############################################################################### +# Types +############################################################################### + + + +ctypedef cudaStream_t Stream +ctypedef cudaDataType DataType +ctypedef libraryPropertyType_t LibraryPropertyType + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) +cpdef destroy(intptr_t handle) +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name) +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name) +cpdef complete(nvJitLinkHandle handle) +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin) +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx) +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log) +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size) +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log) diff --git a/cuda_bindings/nvJitLink.pyx b/cuda_bindings/nvJitLink.pyx new file mode 100644 index 000000000..18f4c7545 --- /dev/null +++ b/cuda_bindings/nvJitLink.pyx @@ -0,0 +1,138 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. + +cimport cython # NOQA + +from enum import IntEnum as _IntEnum + + +############################################################################### +# Enum +############################################################################### + + + + +############################################################################### +# Error handling +############################################################################### + +cdef dict STATUS={ + NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', + NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', + NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', // -arch=sm_NN option not specified + NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', + NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', + NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', + NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL', + NVJITLINK_ERROR_THREADPOOL : 'NVJITLINK_ERROR_THREADPOOL', + NVJITLINK_ERROR_UNRECOGNIZED_INPUT : 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT', + NVJITLINK_ERROR_NULL_INPUT : 'NVJITLINK_ERROR_NULL_INPUT', + NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS: 'NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS', + NVJITLINK_ERROR_INCORRECT_INPUT_TYPE: 'NVJITLINK_ERROR_INCORRECT_INPUT_TYPE', + NVJITLINK_ERROR_ARCH_MISMATCH : 'NVJITLINK_ERROR_ARCH_MISMATCH', + NVJITLINK_ERROR_OUTDATED_LIBRARY : 'NVJITLINK_ERROR_OUTDATED_LIBRARY', + NVJITLINK_ERROR_MISSING_FATBIN : 'NVJITLINK_ERROR_MISSING_FATBIN' +} + +class nvJitLinkError(Exception): + + def __init__(self, status): + self.status = status + cdef str err = STATUS[status] + super(nvJitLinkError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef inline void check_status(int status) nogil: + if status != 0: + with gil: + raise nvJitLinkError(status) + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): + with nogil: + status = nvJitLinkCreate(handle, num_options, options) + _check_status(status) + + +cpdef destroy(intptr_t handle): + with nogil: + status = nvJitLinkDestroy(handle) + _check_status(status) + + +cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name): + with nogil: + status = nvJitLinkAddData(handle, input_type, data, size, name) + _check_status(status) + + +cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name): + with nogil: + status = nvJitLinkAddFile(handle, input_type, file_name) + _check_status(status) + + +cpdef complete(nvJitLinkHandle handle): + with nogil: + status = nvJitLinkComplete(handle) + _check_status(status) + + +cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedCubinSize(handle, size) + _check_status(status) + + +cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin): + with nogil: + status = nvJitLinkGetLinkedCubin(handle, cubin) + _check_status(status) + + +cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetLinkedPtxSize(handle, size) + _check_status(status) + + +cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx): + with nogil: + status = nvJitLinkGetLinkedPtx(handle, ptx) + _check_status(status) + + +cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetErrorLogSize(handle, size) + _check_status(status) + + +cpdef get_error_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetErrorLog(handle, log) + _check_status(status) + + +cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size): + with nogil: + status = nvJitLinkGetInfoLogSize(handle, size) + _check_status(status) + + +cpdef get_info_log(nvJitLinkHandle handle, intptr_t log): + with nogil: + status = nvJitLinkGetInfoLog(handle, log) + _check_status(status) diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py new file mode 100644 index 000000000..7ced5ff38 --- /dev/null +++ b/cuda_bindings/tests/test_nvJitLink.py @@ -0,0 +1,3 @@ +import pytest +from cuda import nvJitLink + From fb1198a371ec5f361666715f5a3acce9ef159533 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 15 Oct 2024 10:11:07 -0700 Subject: [PATCH 08/34] add test file --- cuda_bindings/tests/test_nvJitLink.py | 161 ++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py index 7ced5ff38..f566ae7c6 100644 --- a/cuda_bindings/tests/test_nvJitLink.py +++ b/cuda_bindings/tests/test_nvJitLink.py @@ -1,3 +1,164 @@ import pytest from cuda import nvJitLink +def test_create_no_arch_error(): + # nvjitlink expects at least the architecture to be specified. + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): + nvJitLink.create() + + +def test_invalid_arch_error(): + # sm_XX is not a valid architecture + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-arch=sm_XX") + + +def test_unrecognized_option_error(): + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): + nvJitLink.create("-fictitious_option") + + +def test_invalid_option_type_error(): + with pytest.raises(TypeError, match="Expecting only strings"): + nvJitLink.create("-arch", 53) + + +def test_create_and_destroy(): + handle = nvJitLink.create("-arch=sm_53") + assert handle != 0 + nvJitLink.destroy(handle) + + +def test_complete_empty(): + handle = nvJitLink.create("-arch=sm_75") + nvJitLink.complete(handle) + nvJitLink.destroy(handle) + + +@pytest.mark.parametrize( + "input_file,input_type", + [ + ("device_functions_cubin", nvJitLink.InputType.CUBIN), + ("device_functions_fatbin", InputType.FATBIN), + ("device_functions_ptx", InputType.PTX), + ("device_functions_object", InputType.OBJECT), + ("device_functions_archive", InputType.LIBRARY), + ], +) +def test_add_file(input_file, input_type, gpu_arch_flag, request): + filename, data = request.getfixturevalue(input_file) + + handle = nvJitLink.create(gpu_arch_flag) + nvJitLink.add_data(handle, input_type.value, data, filename) + nvJitLink.destroy(handle) + + +# We test the LTO input case separately as it requires the `-lto` flag. The +# OBJECT input type is used because the LTO-IR container is packaged in an ELF +# object when produced by NVCC. +def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) + nvJitLink.destroy(handle) + + +def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = undefined_extern_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError): + nvJitLink.complete(handle) + error_log = nvJitLink.get_error_log(handle) + nvJitLink.destroy(handle) + assert ( + "Undefined reference to '_Z5undefff' " + "in 'undefined_extern.cubin'" in error_log + ) + + +def test_get_info_log(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + info_log = nvJitLink.get_info_log(handle) + nvJitLink.destroy(handle) + # Info log is empty + assert "" == info_log + + +def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_cubin_link_not_complete_error( + device_functions_cubin, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag) + filename, data = device_functions_cubin + input_type = InputType.CUBIN.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + cubin = nvJitLink.get_linked_cubin(handle) + nvJitLink.destroy(handle) + + # Just check we got something that looks like an ELF + assert cubin[:4] == b"\x7fELF" + + +def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): + filename, data = device_functions_ltoir_object + # device_functions_ltoir_object is a host object containing a fatbin + # containing an LTOIR container, because that is what NVCC produces when + # LTO is requested. So we need to use the OBJECT input type, and the linker + # retrieves the LTO IR from it because we passed the -lto flag. + input_type = InputType.OBJECT.value + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + nvJitLink.add_data(handle, input_type, data, filename) + nvJitLink.complete(handle) + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_get_linked_ptx_link_not_complete_error( + device_functions_ltoir_object, gpu_arch_flag +): + handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") + filename, data = device_functions_ltoir_object + input_type = InputType.OBJECT.value + nvJitLink.add_data(handle, input_type, data, filename) + with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): + nvJitLink.get_linked_ptx(handle) + nvJitLink.destroy(handle) + + +def test_package_version(): + assert pynvjitlink.__version__ is not None + assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file From 2e4955f7a2ca7ebb4b67a99f397b78a6950fdcfb Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 16 Oct 2024 16:46:51 -0700 Subject: [PATCH 09/34] rebase --- .../cuda/bindings/_bindings/nvJitLink.pxd | 26 -- .../bindings/_bindings/nvJitLink_linux.pyx | 382 ----------------- .../bindings/_bindings/nvJitLink_windows.pyx | 393 ------------------ .../bindings/_internal/nvjitlink_windows.pyx | 8 + cuda_bindings/cuda/bindings/cynvjitlink.pxd | 4 + cuda_bindings/cuda/bindings/cynvjitlink.pyx | 3 + cuda_bindings/cynvJitLink.pxd | 48 --- cuda_bindings/cynvJitLink.pyx | 63 --- cuda_bindings/nvJitLink.pxd | 46 -- cuda_bindings/nvJitLink.pyx | 138 ------ cuda_bindings/setup.py | 14 +- cuda_bindings/tests/test_nvJitLink.py | 164 -------- 12 files changed, 27 insertions(+), 1262 deletions(-) delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx delete mode 100644 cuda_bindings/cynvJitLink.pxd delete mode 100644 cuda_bindings/cynvJitLink.pyx delete mode 100644 cuda_bindings/nvJitLink.pxd delete mode 100644 cuda_bindings/nvJitLink.pyx delete mode 100644 cuda_bindings/tests/test_nvJitLink.py diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd deleted file mode 100644 index dca128a0e..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from ..cynvJitLink cimport * - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx deleted file mode 100644 index 2fc6ca625..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .utils cimport get_nvJitLink_dso_version_suffix - -from .utils import FunctionNotFoundError, NotSupportedError - - -############################################################################### -# Extern -############################################################################### - -cdef extern from "" nogil: - void* dlopen(const char*, int) - char* dlerror() - void* dlsym(void*, const char*) - int dlclose(void*) - - enum: - RTLD_LAZY - RTLD_NOW - RTLD_GLOBAL - RTLD_LOCAL - - const void* RTLD_DEFAULT 'RTLD_DEFAULT' - - -############################################################################### -# Wrapper init -############################################################################### - -cdef bint __py_nvJitLink_init = False -cdef void* __cuDriverGetVersion = NULL - -cdef void* __nvJitLinkCreate = NULL -cdef void* __nvJitLinkDestroy = NULL -cdef void* __nvJitLinkAddData = NULL -cdef void* __nvJitLinkAddFile = NULL -cdef void* __nvJitLinkComplete = NULL -cdef void* __nvJitLinkGetLinkedCubinSize = NULL -cdef void* __nvJitLinkGetLinkedCubin = NULL -cdef void* __nvJitLinkGetLinkedPtxSize = NULL -cdef void* __nvJitLinkGetLinkedPtx = NULL -cdef void* __nvJitLinkGetErrorLogSize = NULL -cdef void* __nvJitLinkGetErrorLog = NULL -cdef void* __nvJitLinkGetInfoLogSize = NULL -cdef void* __nvJitLinkGetInfoLog = NULL - - -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle - - -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: - return 0 - - # Load driver to check version - cdef void* handle = NULL - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - with gil: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - global __cuDriverGetVersion - if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if __cuDriverGetVersion == NULL: - with gil: - raise RuntimeError('something went wrong') - cdef int err, driver_ver - err = (__cuDriverGetVersion)(&driver_ver) - if err != 0: - with gil: - raise RuntimeError('something went wrong') - #dlclose(handle) - handle = NULL - - # Load function - global __nvJitLinkCreate - __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') - if __nvJitLinkCreate == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') - - global __nvJitLinkDestroy - __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') - if __nvJitLinkDestroy == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') - - global __nvJitLinkAddData - __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') - if __nvJitLinkAddData == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') - - global __nvJitLinkAddFile - __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') - if __nvJitLinkAddFile == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') - - global __nvJitLinkComplete - __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') - if __nvJitLinkComplete == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') - - global __nvJitLinkGetLinkedCubinSize - __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') - if __nvJitLinkGetLinkedCubinSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') - - global __nvJitLinkGetLinkedCubin - __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') - if __nvJitLinkGetLinkedCubin == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') - - global __nvJitLinkGetLinkedPtxSize - __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') - if __nvJitLinkGetLinkedPtxSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') - - global __nvJitLinkGetLinkedPtx - __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') - if __nvJitLinkGetLinkedPtx == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') - - global __nvJitLinkGetErrorLogSize - __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') - if __nvJitLinkGetErrorLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') - - global __nvJitLinkGetErrorLog - __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') - if __nvJitLinkGetErrorLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') - - global __nvJitLinkGetInfoLogSize - __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') - if __nvJitLinkGetInfoLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') - - global __nvJitLinkGetInfoLog - __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') - if __nvJitLinkGetInfoLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') - - __py_nvJitLink_init = True - return 0 - - -cdef dict func_ptrs = None - - -cpdef dict _inspect_function_pointers(): - global func_ptrs - if func_ptrs is not None: - return func_ptrs - - _check_or_init_nvJitLink() - cdef dict data = {} - - global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate - - global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy - - global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData - - global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile - - global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete - - global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize - - global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin - - global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize - - global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx - - global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize - - global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog - - global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize - - global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog - - func_ptrs = data - return data - - -cpdef _inspect_function_pointer(str name): - global func_ptrs - if func_ptrs is None: - func_ptrs = _inspect_function_pointers() - return func_ptrs[name] - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - global __nvJitLinkCreate - _check_or_init_nvJitLink() - if __nvJitLinkCreate == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkCreate is not found") - return (__nvJitLinkCreate)( - handle, numOptions, options) - - -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - global __nvJitLinkDestroy - _check_or_init_nvJitLink() - if __nvJitLinkDestroy == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkDestroy is not found") - return (__nvJitLinkDestroy)( - handle) - - -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - global __nvJitLinkAddData - _check_or_init_nvJitLink() - if __nvJitLinkAddData == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddData is not found") - return (__nvJitLinkAddData)( - handle, inputType, data, size, name) - - -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - global __nvJitLinkAddFile - _check_or_init_nvJitLink() - if __nvJitLinkAddFile == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddFile is not found") - return (__nvJitLinkAddFile)( - handle, inputType, fileName) - - -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - global __nvJitLinkComplete - _check_or_init_nvJitLink() - if __nvJitLinkComplete == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkComplete is not found") - return (__nvJitLinkComplete)( - handle) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubinSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") - return (__nvJitLinkGetLinkedCubinSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubin == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") - return (__nvJitLinkGetLinkedCubin)( - handle, cubin) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtxSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") - return (__nvJitLinkGetLinkedPtxSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtx == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") - return (__nvJitLinkGetLinkedPtx)( - handle, ptx) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") - return (__nvJitLinkGetErrorLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") - return (__nvJitLinkGetErrorLog)( - handle, log) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") - return (__nvJitLinkGetInfoLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") - return (__nvJitLinkGetInfoLog)( - handle, log) diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx deleted file mode 100644 index 8856b59ca..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx +++ /dev/null @@ -1,393 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .utils cimport get_nvJitLink_dso_version_suffix - -import os -import site - -import win32api - -from .utils import FunctionNotFoundError, NotSupportedError - - -############################################################################### -# Wrapper init -############################################################################### - -LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 -LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 -LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 -cdef bint __py_nvJitLink_init = False -cdef void* __cuDriverGetVersion = NULL - -cdef void* __nvJitLinkCreate = NULL -cdef void* __nvJitLinkDestroy = NULL -cdef void* __nvJitLinkAddData = NULL -cdef void* __nvJitLinkAddFile = NULL -cdef void* __nvJitLinkComplete = NULL -cdef void* __nvJitLinkGetLinkedCubinSize = NULL -cdef void* __nvJitLinkGetLinkedCubin = NULL -cdef void* __nvJitLinkGetLinkedPtxSize = NULL -cdef void* __nvJitLinkGetLinkedPtx = NULL -cdef void* __nvJitLinkGetErrorLogSize = NULL -cdef void* __nvJitLinkGetErrorLog = NULL -cdef void* __nvJitLinkGetInfoLogSize = NULL -cdef void* __nvJitLinkGetInfoLog = NULL - - -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink64_{suffix}.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvJitLink') - - assert handle != 0 - return handle - - -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: - return 0 - - cdef int err, driver_ver - with gil: - # Load driver to check version - try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) - except Exception as e: - raise NotSupportedError(f'CUDA driver is not found ({e})') - global __cuDriverGetVersion - if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') - if __cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (__cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - # Load library - handle = load_library(driver_ver) - - # Load function - global __nvJitLinkCreate - try: - __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') - except: - pass - - global __nvJitLinkDestroy - try: - __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') - except: - pass - - global __nvJitLinkAddData - try: - __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') - except: - pass - - global __nvJitLinkAddFile - try: - __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') - except: - pass - - global __nvJitLinkComplete - try: - __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') - except: - pass - - global __nvJitLinkGetLinkedCubinSize - try: - __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') - except: - pass - - global __nvJitLinkGetLinkedCubin - try: - __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') - except: - pass - - global __nvJitLinkGetLinkedPtxSize - try: - __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') - except: - pass - - global __nvJitLinkGetLinkedPtx - try: - __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') - except: - pass - - global __nvJitLinkGetErrorLogSize - try: - __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') - except: - pass - - global __nvJitLinkGetErrorLog - try: - __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') - except: - pass - - global __nvJitLinkGetInfoLogSize - try: - __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') - except: - pass - - global __nvJitLinkGetInfoLog - try: - __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') - except: - pass - - __py_nvJitLink_init = True - return 0 - - -cdef dict func_ptrs = None - - -cpdef dict _inspect_function_pointers(): - global func_ptrs - if func_ptrs is not None: - return func_ptrs - - _check_or_init_nvJitLink() - cdef dict data = {} - - global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate - - global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy - - global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData - - global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile - - global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete - - global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize - - global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin - - global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize - - global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx - - global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize - - global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog - - global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize - - global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog - - func_ptrs = data - return data - - -cpdef _inspect_function_pointer(str name): - global func_ptrs - if func_ptrs is None: - func_ptrs = _inspect_function_pointers() - return func_ptrs[name] - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - global __nvJitLinkCreate - _check_or_init_nvJitLink() - if __nvJitLinkCreate == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkCreate is not found") - return (__nvJitLinkCreate)( - handle, numOptions, options) - - -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - global __nvJitLinkDestroy - _check_or_init_nvJitLink() - if __nvJitLinkDestroy == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkDestroy is not found") - return (__nvJitLinkDestroy)( - handle) - - -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - global __nvJitLinkAddData - _check_or_init_nvJitLink() - if __nvJitLinkAddData == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddData is not found") - return (__nvJitLinkAddData)( - handle, inputType, data, size, name) - - -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - global __nvJitLinkAddFile - _check_or_init_nvJitLink() - if __nvJitLinkAddFile == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddFile is not found") - return (__nvJitLinkAddFile)( - handle, inputType, fileName) - - -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - global __nvJitLinkComplete - _check_or_init_nvJitLink() - if __nvJitLinkComplete == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkComplete is not found") - return (__nvJitLinkComplete)( - handle) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubinSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") - return (__nvJitLinkGetLinkedCubinSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubin == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") - return (__nvJitLinkGetLinkedCubin)( - handle, cubin) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtxSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") - return (__nvJitLinkGetLinkedPtxSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtx == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") - return (__nvJitLinkGetLinkedPtx)( - handle, ptx) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") - return (__nvJitLinkGetErrorLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") - return (__nvJitLinkGetErrorLog)( - handle, log) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") - return (__nvJitLinkGetInfoLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") - return (__nvJitLinkGetInfoLog)( - handle, log) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 5cac180f3..19ea34ee1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,14 +6,22 @@ from libc.stdint cimport intptr_t +<<<<<<< HEAD from utils cimport get_nvjitlink_dso_version_suffix +======= +from .utils cimport get_nvjitlink_dso_version_suffix +>>>>>>> 5d60eb1 (more changes) import os import site import win32api +<<<<<<< HEAD from utils import FunctionNotFoundError, NotSupportedError +======= +from .utils import FunctionNotFoundError, NotSupportedError +>>>>>>> 5d60eb1 (more changes) ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 3dcc1d4ec..250153ece 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -5,7 +5,11 @@ # This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +<<<<<<< HEAD from libc.stdint cimport uint32_t +======= +from libc.stdint cimport intptr_t, uint32_t +>>>>>>> 5d60eb1 (more changes) ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx index 5e882524e..d4acbd606 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -5,7 +5,10 @@ # This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. from ._internal cimport nvjitlink as _nvjitlink +<<<<<<< HEAD from libc.stdint cimport uint32_t +======= +>>>>>>> 5d60eb1 (more changes) ############################################################################### diff --git a/cuda_bindings/cynvJitLink.pxd b/cuda_bindings/cynvJitLink.pxd deleted file mode 100644 index ed440c0b3..000000000 --- a/cuda_bindings/cynvJitLink.pxd +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - - -from libc.stdint cimport int64_t - - -############################################################################### -# Types (structs, enums, ...) -############################################################################### - -# enums - - - -# types -cdef extern from *: - """ - #include - #include - #include - """ - ctypedef void* cudaStream_t 'cudaStream_t' - - - - - -############################################################################### -# Functions -############################################################################### - -cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil -cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil -cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil -cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil -cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil -cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil -cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil -cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil -cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cynvJitLink.pyx b/cuda_bindings/cynvJitLink.pyx deleted file mode 100644 index 65d3f9840..000000000 --- a/cuda_bindings/cynvJitLink.pyx +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from ._internal cimport nvJitLink as _nvJitLink - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - return _nvJitLink._nvJitLinkCreate(handle, numOptions, options) - - -cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - return _nvJitLink._nvJitLinkDestroy(handle) - - -cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - return _nvJitLink._nvJitLinkAddData(handle, inputType, data, size, name) - - -cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - return _nvJitLink._nvJitLinkAddFile(handle, inputType, fileName) - - -cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - return _nvJitLink._nvJitLinkComplete(handle) - - -cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedCubinSize(handle, size) - - -cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedCubin(handle, cubin) - - -cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedPtxSize(handle, size) - - -cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - return _nvJitLink._nvJitLinkGetLinkedPtx(handle, ptx) - - -cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetErrorLogSize(handle, size) - - -cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - return _nvJitLink._nvJitLinkGetErrorLog(handle, log) - - -cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - return _nvJitLink._nvJitLinkGetInfoLogSize(handle, size) - - -cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - return _nvJitLink._nvJitLinkGetInfoLog(handle, log) diff --git a/cuda_bindings/nvJitLink.pxd b/cuda_bindings/nvJitLink.pxd deleted file mode 100644 index d063002be..000000000 --- a/cuda_bindings/nvJitLink.pxd +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .cynvJitLink cimport * - - -############################################################################### -# Types -############################################################################### - - - -ctypedef cudaStream_t Stream -ctypedef cudaDataType DataType -ctypedef libraryPropertyType_t LibraryPropertyType - - -############################################################################### -# Enum -############################################################################### - - - - -############################################################################### -# Functions -############################################################################### - -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) -cpdef destroy(intptr_t handle) -cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name) -cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name) -cpdef complete(nvJitLinkHandle handle) -cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin) -cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx) -cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_error_log(nvJitLinkHandle handle, intptr_t log) -cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size) -cpdef get_info_log(nvJitLinkHandle handle, intptr_t log) diff --git a/cuda_bindings/nvJitLink.pyx b/cuda_bindings/nvJitLink.pyx deleted file mode 100644 index 18f4c7545..000000000 --- a/cuda_bindings/nvJitLink.pyx +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -cimport cython # NOQA - -from enum import IntEnum as _IntEnum - - -############################################################################### -# Enum -############################################################################### - - - - -############################################################################### -# Error handling -############################################################################### - -cdef dict STATUS={ - NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', - NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', - NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', // -arch=sm_NN option not specified - NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', - NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', - NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', - NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL', - NVJITLINK_ERROR_THREADPOOL : 'NVJITLINK_ERROR_THREADPOOL', - NVJITLINK_ERROR_UNRECOGNIZED_INPUT : 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT', - NVJITLINK_ERROR_NULL_INPUT : 'NVJITLINK_ERROR_NULL_INPUT', - NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS: 'NVJITLINK_ERROR_INCOMPATIBLE_OPTIONS', - NVJITLINK_ERROR_INCORRECT_INPUT_TYPE: 'NVJITLINK_ERROR_INCORRECT_INPUT_TYPE', - NVJITLINK_ERROR_ARCH_MISMATCH : 'NVJITLINK_ERROR_ARCH_MISMATCH', - NVJITLINK_ERROR_OUTDATED_LIBRARY : 'NVJITLINK_ERROR_OUTDATED_LIBRARY', - NVJITLINK_ERROR_MISSING_FATBIN : 'NVJITLINK_ERROR_MISSING_FATBIN' -} - -class nvJitLinkError(Exception): - - def __init__(self, status): - self.status = status - cdef str err = STATUS[status] - super(nvJitLinkError, self).__init__(err) - - def __reduce__(self): - return (type(self), (self.status,)) - - -@cython.profile(False) -cdef inline void check_status(int status) nogil: - if status != 0: - with gil: - raise nvJitLinkError(status) - - -############################################################################### -# Wrapper functions -############################################################################### - -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): - with nogil: - status = nvJitLinkCreate(handle, num_options, options) - _check_status(status) - - -cpdef destroy(intptr_t handle): - with nogil: - status = nvJitLinkDestroy(handle) - _check_status(status) - - -cpdef add_data(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t data, size_t size, intptr_t name): - with nogil: - status = nvJitLinkAddData(handle, input_type, data, size, name) - _check_status(status) - - -cpdef add_file(nvJitLinkHandle handle, nvJitLinkInputType input_type, intptr_t file_name): - with nogil: - status = nvJitLinkAddFile(handle, input_type, file_name) - _check_status(status) - - -cpdef complete(nvJitLinkHandle handle): - with nogil: - status = nvJitLinkComplete(handle) - _check_status(status) - - -cpdef get_linked_cubin_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetLinkedCubinSize(handle, size) - _check_status(status) - - -cpdef get_linked_cubin(nvJitLinkHandle handle, intptr_t cubin): - with nogil: - status = nvJitLinkGetLinkedCubin(handle, cubin) - _check_status(status) - - -cpdef get_linked_ptx_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetLinkedPtxSize(handle, size) - _check_status(status) - - -cpdef get_linked_ptx(nvJitLinkHandle handle, intptr_t ptx): - with nogil: - status = nvJitLinkGetLinkedPtx(handle, ptx) - _check_status(status) - - -cpdef get_error_log_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetErrorLogSize(handle, size) - _check_status(status) - - -cpdef get_error_log(nvJitLinkHandle handle, intptr_t log): - with nogil: - status = nvJitLinkGetErrorLog(handle, log) - _check_status(status) - - -cpdef get_info_log_size(nvJitLinkHandle handle, intptr_t size): - with nogil: - status = nvJitLinkGetInfoLogSize(handle, size) - _check_status(status) - - -cpdef get_info_log(nvJitLinkHandle handle, intptr_t log): - with nogil: - status = nvJitLinkGetInfoLog(handle, log) - _check_status(status) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 8ffb50d63..4bfc57f19 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -231,7 +231,6 @@ def rename_architecture_specific_files(path): @atexit.register def cleanup_dst_files(): - pass for dst in architechture_specific_files_dir: try: os.remove(dst) @@ -240,6 +239,7 @@ def cleanup_dst_files(): architechture_specific_files_dir = 'cuda/bindings/_internal/' + def do_cythonize(extensions): return cythonize( extensions, @@ -267,14 +267,19 @@ def do_cythonize(extensions): # interal files used by cybind. We on ['cuda/bindings/_internal/nvjitlink.pyx'], ['cuda/bindings/_internal/utils.pyx'], - ] +rename_architecture_specific_files() + for sources in sources_list: extensions += prep_extensions(sources) +# for sources in new_sources_list: +# new_extensions += prep_extensions(sources) + + # --------------------------------------------------------------------- # Custom build_ext command # Files are build in two steps: @@ -297,6 +302,11 @@ def finalize_options(self): # ---------------------------------------------------------------------- # Setup +package_data=dict.fromkeys( + find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), + ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], + ) + setup( version=versioneer.get_version(), ext_modules=do_cythonize(extensions), diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py deleted file mode 100644 index f566ae7c6..000000000 --- a/cuda_bindings/tests/test_nvJitLink.py +++ /dev/null @@ -1,164 +0,0 @@ -import pytest -from cuda import nvJitLink - -def test_create_no_arch_error(): - # nvjitlink expects at least the architecture to be specified. - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): - nvJitLink.create() - - -def test_invalid_arch_error(): - # sm_XX is not a valid architecture - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-arch=sm_XX") - - -def test_unrecognized_option_error(): - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-fictitious_option") - - -def test_invalid_option_type_error(): - with pytest.raises(TypeError, match="Expecting only strings"): - nvJitLink.create("-arch", 53) - - -def test_create_and_destroy(): - handle = nvJitLink.create("-arch=sm_53") - assert handle != 0 - nvJitLink.destroy(handle) - - -def test_complete_empty(): - handle = nvJitLink.create("-arch=sm_75") - nvJitLink.complete(handle) - nvJitLink.destroy(handle) - - -@pytest.mark.parametrize( - "input_file,input_type", - [ - ("device_functions_cubin", nvJitLink.InputType.CUBIN), - ("device_functions_fatbin", InputType.FATBIN), - ("device_functions_ptx", InputType.PTX), - ("device_functions_object", InputType.OBJECT), - ("device_functions_archive", InputType.LIBRARY), - ], -) -def test_add_file(input_file, input_type, gpu_arch_flag, request): - filename, data = request.getfixturevalue(input_file) - - handle = nvJitLink.create(gpu_arch_flag) - nvJitLink.add_data(handle, input_type.value, data, filename) - nvJitLink.destroy(handle) - - -# We test the LTO input case separately as it requires the `-lto` flag. The -# OBJECT input type is used because the LTO-IR container is packaged in an ELF -# object when produced by NVCC. -def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) - nvJitLink.destroy(handle) - - -def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = undefined_extern_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError): - nvJitLink.complete(handle) - error_log = nvJitLink.get_error_log(handle) - nvJitLink.destroy(handle) - assert ( - "Undefined reference to '_Z5undefff' " - "in 'undefined_extern.cubin'" in error_log - ) - - -def test_get_info_log(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - info_log = nvJitLink.get_info_log(handle) - nvJitLink.destroy(handle) - # Info log is empty - assert "" == info_log - - -def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - - -def test_get_linked_cubin_link_not_complete_error( - device_functions_cubin, gpu_arch_flag -): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - -def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - - -def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) - - -def test_get_linked_ptx_link_not_complete_error( - device_functions_ltoir_object, gpu_arch_flag -): - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") - filename, data = device_functions_ltoir_object - input_type = InputType.OBJECT.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) - - -def test_package_version(): - assert pynvjitlink.__version__ is not None - assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file From 238736c881cffeadaad0a0e3552fdb1ced18c596 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 16 Oct 2024 16:58:53 -0700 Subject: [PATCH 10/34] rebase merge --- .../cuda/bindings/_internal/nvjitlink_windows.pyx | 4 ++-- cuda_bindings/cuda/bindings/cynvjitlink.pxd | 4 ++++ cuda_bindings/cuda/bindings/cynvjitlink.pyx | 4 ++++ cuda_bindings/setup.py | 14 -------------- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 43852441e..5cac180f3 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,14 +6,14 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix +from utils cimport get_nvjitlink_dso_version_suffix import os import site import win32api -from .utils import FunctionNotFoundError, NotSupportedError +from utils import FunctionNotFoundError, NotSupportedError ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 250153ece..3f4134706 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -5,11 +5,15 @@ # This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +<<<<<<< HEAD <<<<<<< HEAD from libc.stdint cimport uint32_t ======= from libc.stdint cimport intptr_t, uint32_t >>>>>>> 5d60eb1 (more changes) +======= +from libc.stdint cimport uint32_t +>>>>>>> 8c4029f (working) ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx index d4acbd606..c91948f03 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -6,9 +6,13 @@ from ._internal cimport nvjitlink as _nvjitlink <<<<<<< HEAD +<<<<<<< HEAD from libc.stdint cimport uint32_t ======= >>>>>>> 5d60eb1 (more changes) +======= +from libc.stdint cimport uint32_t +>>>>>>> 8c4029f (working) ############################################################################### diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 4bfc57f19..f0aaee771 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -239,7 +239,6 @@ def cleanup_dst_files(): architechture_specific_files_dir = 'cuda/bindings/_internal/' - def do_cythonize(extensions): return cythonize( extensions, @@ -269,17 +268,9 @@ def do_cythonize(extensions): ['cuda/bindings/_internal/utils.pyx'], ] - - -rename_architecture_specific_files() - for sources in sources_list: extensions += prep_extensions(sources) -# for sources in new_sources_list: -# new_extensions += prep_extensions(sources) - - # --------------------------------------------------------------------- # Custom build_ext command # Files are build in two steps: @@ -302,11 +293,6 @@ def finalize_options(self): # ---------------------------------------------------------------------- # Setup -package_data=dict.fromkeys( - find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), - ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], - ) - setup( version=versioneer.get_version(), ext_modules=do_cythonize(extensions), From d4bd29c550ca35fcf5674ae0595ac3b44e6e767e Mon Sep 17 00:00:00 2001 From: ksimpson Date: Wed, 16 Oct 2024 17:10:00 -0700 Subject: [PATCH 11/34] cleaning up merge --- .../cuda/bindings/_bindings/nvJitLink.pxd | 26 -- .../bindings/_bindings/nvJitLink_linux.pyx | 382 ----------------- .../bindings/_bindings/nvJitLink_windows.pyx | 393 ------------------ .../cuda/bindings/_internal/nvjitlink.pyx | 382 ----------------- cuda_bindings/setup.py | 8 +- cuda_bindings/tests/test_nvJitLink.py | 164 -------- 6 files changed, 7 insertions(+), 1348 deletions(-) delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx delete mode 100644 cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx delete mode 100644 cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx delete mode 100644 cuda_bindings/tests/test_nvJitLink.py diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd b/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd deleted file mode 100644 index dca128a0e..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink.pxd +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from ..cynvJitLink cimport * - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx deleted file mode 100644 index 2fc6ca625..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_linux.pyx +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .utils cimport get_nvJitLink_dso_version_suffix - -from .utils import FunctionNotFoundError, NotSupportedError - - -############################################################################### -# Extern -############################################################################### - -cdef extern from "" nogil: - void* dlopen(const char*, int) - char* dlerror() - void* dlsym(void*, const char*) - int dlclose(void*) - - enum: - RTLD_LAZY - RTLD_NOW - RTLD_GLOBAL - RTLD_LOCAL - - const void* RTLD_DEFAULT 'RTLD_DEFAULT' - - -############################################################################### -# Wrapper init -############################################################################### - -cdef bint __py_nvJitLink_init = False -cdef void* __cuDriverGetVersion = NULL - -cdef void* __nvJitLinkCreate = NULL -cdef void* __nvJitLinkDestroy = NULL -cdef void* __nvJitLinkAddData = NULL -cdef void* __nvJitLinkAddFile = NULL -cdef void* __nvJitLinkComplete = NULL -cdef void* __nvJitLinkGetLinkedCubinSize = NULL -cdef void* __nvJitLinkGetLinkedCubin = NULL -cdef void* __nvJitLinkGetLinkedPtxSize = NULL -cdef void* __nvJitLinkGetLinkedPtx = NULL -cdef void* __nvJitLinkGetErrorLogSize = NULL -cdef void* __nvJitLinkGetErrorLog = NULL -cdef void* __nvJitLinkGetInfoLogSize = NULL -cdef void* __nvJitLinkGetInfoLog = NULL - - -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle - - -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: - return 0 - - # Load driver to check version - cdef void* handle = NULL - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - with gil: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - global __cuDriverGetVersion - if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if __cuDriverGetVersion == NULL: - with gil: - raise RuntimeError('something went wrong') - cdef int err, driver_ver - err = (__cuDriverGetVersion)(&driver_ver) - if err != 0: - with gil: - raise RuntimeError('something went wrong') - #dlclose(handle) - handle = NULL - - # Load function - global __nvJitLinkCreate - __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') - if __nvJitLinkCreate == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') - - global __nvJitLinkDestroy - __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') - if __nvJitLinkDestroy == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') - - global __nvJitLinkAddData - __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') - if __nvJitLinkAddData == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') - - global __nvJitLinkAddFile - __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') - if __nvJitLinkAddFile == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') - - global __nvJitLinkComplete - __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') - if __nvJitLinkComplete == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') - - global __nvJitLinkGetLinkedCubinSize - __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') - if __nvJitLinkGetLinkedCubinSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') - - global __nvJitLinkGetLinkedCubin - __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') - if __nvJitLinkGetLinkedCubin == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') - - global __nvJitLinkGetLinkedPtxSize - __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') - if __nvJitLinkGetLinkedPtxSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') - - global __nvJitLinkGetLinkedPtx - __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') - if __nvJitLinkGetLinkedPtx == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') - - global __nvJitLinkGetErrorLogSize - __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') - if __nvJitLinkGetErrorLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') - - global __nvJitLinkGetErrorLog - __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') - if __nvJitLinkGetErrorLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') - - global __nvJitLinkGetInfoLogSize - __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') - if __nvJitLinkGetInfoLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') - - global __nvJitLinkGetInfoLog - __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') - if __nvJitLinkGetInfoLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') - - __py_nvJitLink_init = True - return 0 - - -cdef dict func_ptrs = None - - -cpdef dict _inspect_function_pointers(): - global func_ptrs - if func_ptrs is not None: - return func_ptrs - - _check_or_init_nvJitLink() - cdef dict data = {} - - global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate - - global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy - - global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData - - global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile - - global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete - - global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize - - global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin - - global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize - - global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx - - global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize - - global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog - - global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize - - global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog - - func_ptrs = data - return data - - -cpdef _inspect_function_pointer(str name): - global func_ptrs - if func_ptrs is None: - func_ptrs = _inspect_function_pointers() - return func_ptrs[name] - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - global __nvJitLinkCreate - _check_or_init_nvJitLink() - if __nvJitLinkCreate == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkCreate is not found") - return (__nvJitLinkCreate)( - handle, numOptions, options) - - -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - global __nvJitLinkDestroy - _check_or_init_nvJitLink() - if __nvJitLinkDestroy == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkDestroy is not found") - return (__nvJitLinkDestroy)( - handle) - - -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - global __nvJitLinkAddData - _check_or_init_nvJitLink() - if __nvJitLinkAddData == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddData is not found") - return (__nvJitLinkAddData)( - handle, inputType, data, size, name) - - -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - global __nvJitLinkAddFile - _check_or_init_nvJitLink() - if __nvJitLinkAddFile == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddFile is not found") - return (__nvJitLinkAddFile)( - handle, inputType, fileName) - - -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - global __nvJitLinkComplete - _check_or_init_nvJitLink() - if __nvJitLinkComplete == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkComplete is not found") - return (__nvJitLinkComplete)( - handle) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubinSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") - return (__nvJitLinkGetLinkedCubinSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubin == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") - return (__nvJitLinkGetLinkedCubin)( - handle, cubin) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtxSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") - return (__nvJitLinkGetLinkedPtxSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtx == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") - return (__nvJitLinkGetLinkedPtx)( - handle, ptx) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") - return (__nvJitLinkGetErrorLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") - return (__nvJitLinkGetErrorLog)( - handle, log) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") - return (__nvJitLinkGetInfoLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") - return (__nvJitLinkGetInfoLog)( - handle, log) diff --git a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx b/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx deleted file mode 100644 index 8856b59ca..000000000 --- a/cuda_bindings/cuda/bindings/_bindings/nvJitLink_windows.pyx +++ /dev/null @@ -1,393 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.1 to 12.4.1. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .utils cimport get_nvJitLink_dso_version_suffix - -import os -import site - -import win32api - -from .utils import FunctionNotFoundError, NotSupportedError - - -############################################################################### -# Wrapper init -############################################################################### - -LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 -LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 -LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 -cdef bint __py_nvJitLink_init = False -cdef void* __cuDriverGetVersion = NULL - -cdef void* __nvJitLinkCreate = NULL -cdef void* __nvJitLinkDestroy = NULL -cdef void* __nvJitLinkAddData = NULL -cdef void* __nvJitLinkAddFile = NULL -cdef void* __nvJitLinkComplete = NULL -cdef void* __nvJitLinkGetLinkedCubinSize = NULL -cdef void* __nvJitLinkGetLinkedCubin = NULL -cdef void* __nvJitLinkGetLinkedPtxSize = NULL -cdef void* __nvJitLinkGetLinkedPtx = NULL -cdef void* __nvJitLinkGetErrorLogSize = NULL -cdef void* __nvJitLinkGetErrorLog = NULL -cdef void* __nvJitLinkGetInfoLogSize = NULL -cdef void* __nvJitLinkGetInfoLog = NULL - - -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvJitLink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink64_{suffix}.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvJitLink') - - assert handle != 0 - return handle - - -cdef int _check_or_init_nvJitLink() except -1 nogil: - global __py_nvJitLink_init - if __py_nvJitLink_init: - return 0 - - cdef int err, driver_ver - with gil: - # Load driver to check version - try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) - except Exception as e: - raise NotSupportedError(f'CUDA driver is not found ({e})') - global __cuDriverGetVersion - if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') - if __cuDriverGetVersion == NULL: - raise RuntimeError('something went wrong') - err = (__cuDriverGetVersion)(&driver_ver) - if err != 0: - raise RuntimeError('something went wrong') - - # Load library - handle = load_library(driver_ver) - - # Load function - global __nvJitLinkCreate - try: - __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') - except: - pass - - global __nvJitLinkDestroy - try: - __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') - except: - pass - - global __nvJitLinkAddData - try: - __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') - except: - pass - - global __nvJitLinkAddFile - try: - __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') - except: - pass - - global __nvJitLinkComplete - try: - __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') - except: - pass - - global __nvJitLinkGetLinkedCubinSize - try: - __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') - except: - pass - - global __nvJitLinkGetLinkedCubin - try: - __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') - except: - pass - - global __nvJitLinkGetLinkedPtxSize - try: - __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') - except: - pass - - global __nvJitLinkGetLinkedPtx - try: - __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') - except: - pass - - global __nvJitLinkGetErrorLogSize - try: - __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') - except: - pass - - global __nvJitLinkGetErrorLog - try: - __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') - except: - pass - - global __nvJitLinkGetInfoLogSize - try: - __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') - except: - pass - - global __nvJitLinkGetInfoLog - try: - __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') - except: - pass - - __py_nvJitLink_init = True - return 0 - - -cdef dict func_ptrs = None - - -cpdef dict _inspect_function_pointers(): - global func_ptrs - if func_ptrs is not None: - return func_ptrs - - _check_or_init_nvJitLink() - cdef dict data = {} - - global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate - - global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy - - global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData - - global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile - - global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete - - global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize - - global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin - - global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize - - global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx - - global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize - - global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog - - global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize - - global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog - - func_ptrs = data - return data - - -cpdef _inspect_function_pointer(str name): - global func_ptrs - if func_ptrs is None: - func_ptrs = _inspect_function_pointers() - return func_ptrs[name] - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - global __nvJitLinkCreate - _check_or_init_nvJitLink() - if __nvJitLinkCreate == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkCreate is not found") - return (__nvJitLinkCreate)( - handle, numOptions, options) - - -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - global __nvJitLinkDestroy - _check_or_init_nvJitLink() - if __nvJitLinkDestroy == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkDestroy is not found") - return (__nvJitLinkDestroy)( - handle) - - -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - global __nvJitLinkAddData - _check_or_init_nvJitLink() - if __nvJitLinkAddData == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddData is not found") - return (__nvJitLinkAddData)( - handle, inputType, data, size, name) - - -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - global __nvJitLinkAddFile - _check_or_init_nvJitLink() - if __nvJitLinkAddFile == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddFile is not found") - return (__nvJitLinkAddFile)( - handle, inputType, fileName) - - -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - global __nvJitLinkComplete - _check_or_init_nvJitLink() - if __nvJitLinkComplete == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkComplete is not found") - return (__nvJitLinkComplete)( - handle) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubinSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") - return (__nvJitLinkGetLinkedCubinSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - global __nvJitLinkGetLinkedCubin - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedCubin == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") - return (__nvJitLinkGetLinkedCubin)( - handle, cubin) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtxSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") - return (__nvJitLinkGetLinkedPtxSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - global __nvJitLinkGetLinkedPtx - _check_or_init_nvJitLink() - if __nvJitLinkGetLinkedPtx == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") - return (__nvJitLinkGetLinkedPtx)( - handle, ptx) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetErrorLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") - return (__nvJitLinkGetErrorLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetErrorLog - _check_or_init_nvJitLink() - if __nvJitLinkGetErrorLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") - return (__nvJitLinkGetErrorLog)( - handle, log) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetInfoLogSize - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") - return (__nvJitLinkGetInfoLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetInfoLog - _check_or_init_nvJitLink() - if __nvJitLinkGetInfoLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") - return (__nvJitLinkGetInfoLog)( - handle, log) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx deleted file mode 100644 index ff7a6ca3a..000000000 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pyx +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License-Identifier: Apache-2.0 -# -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. - -from libc.stdint cimport intptr_t - -from .utils cimport get_nvjitlink_dso_version_suffix - -from .utils import FunctionNotFoundError, NotSupportedError - - -############################################################################### -# Extern -############################################################################### - -cdef extern from "" nogil: - void* dlopen(const char*, int) - char* dlerror() - void* dlsym(void*, const char*) - int dlclose(void*) - - enum: - RTLD_LAZY - RTLD_NOW - RTLD_GLOBAL - RTLD_LOCAL - - const void* RTLD_DEFAULT 'RTLD_DEFAULT' - - -############################################################################### -# Wrapper init -############################################################################### - -cdef bint __py_nvjitlink_init = False -cdef void* __cuDriverGetVersion = NULL - -cdef void* __nvJitLinkCreate = NULL -cdef void* __nvJitLinkDestroy = NULL -cdef void* __nvJitLinkAddData = NULL -cdef void* __nvJitLinkAddFile = NULL -cdef void* __nvJitLinkComplete = NULL -cdef void* __nvJitLinkGetLinkedCubinSize = NULL -cdef void* __nvJitLinkGetLinkedCubin = NULL -cdef void* __nvJitLinkGetLinkedPtxSize = NULL -cdef void* __nvJitLinkGetLinkedPtx = NULL -cdef void* __nvJitLinkGetErrorLogSize = NULL -cdef void* __nvJitLinkGetErrorLog = NULL -cdef void* __nvJitLinkGetInfoLogSize = NULL -cdef void* __nvJitLinkGetInfoLog = NULL - - -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvjitlink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvjitlink ({err_msg.decode()})') - return handle - - -cdef int _check_or_init_nvjitlink() except -1 nogil: - global __py_nvjitlink_init - if __py_nvjitlink_init: - return 0 - - # Load driver to check version - cdef void* handle = NULL - handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) - if handle == NULL: - with gil: - err_msg = dlerror() - raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') - global __cuDriverGetVersion - if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") - if __cuDriverGetVersion == NULL: - with gil: - raise RuntimeError('something went wrong') - cdef int err, driver_ver - err = (__cuDriverGetVersion)(&driver_ver) - if err != 0: - with gil: - raise RuntimeError('something went wrong') - #dlclose(handle) - handle = NULL - - # Load function - global __nvJitLinkCreate - __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') - if __nvJitLinkCreate == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') - - global __nvJitLinkDestroy - __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') - if __nvJitLinkDestroy == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') - - global __nvJitLinkAddData - __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') - if __nvJitLinkAddData == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') - - global __nvJitLinkAddFile - __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') - if __nvJitLinkAddFile == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') - - global __nvJitLinkComplete - __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') - if __nvJitLinkComplete == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') - - global __nvJitLinkGetLinkedCubinSize - __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') - if __nvJitLinkGetLinkedCubinSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') - - global __nvJitLinkGetLinkedCubin - __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') - if __nvJitLinkGetLinkedCubin == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') - - global __nvJitLinkGetLinkedPtxSize - __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') - if __nvJitLinkGetLinkedPtxSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') - - global __nvJitLinkGetLinkedPtx - __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') - if __nvJitLinkGetLinkedPtx == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') - - global __nvJitLinkGetErrorLogSize - __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') - if __nvJitLinkGetErrorLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') - - global __nvJitLinkGetErrorLog - __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') - if __nvJitLinkGetErrorLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') - - global __nvJitLinkGetInfoLogSize - __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') - if __nvJitLinkGetInfoLogSize == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') - - global __nvJitLinkGetInfoLog - __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') - if __nvJitLinkGetInfoLog == NULL: - if handle == NULL: - handle = load_library(driver_ver) - __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') - - __py_nvjitlink_init = True - return 0 - - -cdef dict func_ptrs = None - - -cpdef dict _inspect_function_pointers(): - global func_ptrs - if func_ptrs is not None: - return func_ptrs - - _check_or_init_nvjitlink() - cdef dict data = {} - - global __nvJitLinkCreate - data["__nvJitLinkCreate"] = __nvJitLinkCreate - - global __nvJitLinkDestroy - data["__nvJitLinkDestroy"] = __nvJitLinkDestroy - - global __nvJitLinkAddData - data["__nvJitLinkAddData"] = __nvJitLinkAddData - - global __nvJitLinkAddFile - data["__nvJitLinkAddFile"] = __nvJitLinkAddFile - - global __nvJitLinkComplete - data["__nvJitLinkComplete"] = __nvJitLinkComplete - - global __nvJitLinkGetLinkedCubinSize - data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize - - global __nvJitLinkGetLinkedCubin - data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin - - global __nvJitLinkGetLinkedPtxSize - data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize - - global __nvJitLinkGetLinkedPtx - data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx - - global __nvJitLinkGetErrorLogSize - data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize - - global __nvJitLinkGetErrorLog - data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog - - global __nvJitLinkGetInfoLogSize - data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize - - global __nvJitLinkGetInfoLog - data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog - - func_ptrs = data - return data - - -cpdef _inspect_function_pointer(str name): - global func_ptrs - if func_ptrs is None: - func_ptrs = _inspect_function_pointers() - return func_ptrs[name] - - -############################################################################### -# Wrapper functions -############################################################################### - -cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: - global __nvJitLinkCreate - _check_or_init_nvjitlink() - if __nvJitLinkCreate == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkCreate is not found") - return (__nvJitLinkCreate)( - handle, numOptions, options) - - -cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: - global __nvJitLinkDestroy - _check_or_init_nvjitlink() - if __nvJitLinkDestroy == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkDestroy is not found") - return (__nvJitLinkDestroy)( - handle) - - -cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: - global __nvJitLinkAddData - _check_or_init_nvjitlink() - if __nvJitLinkAddData == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddData is not found") - return (__nvJitLinkAddData)( - handle, inputType, data, size, name) - - -cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: - global __nvJitLinkAddFile - _check_or_init_nvjitlink() - if __nvJitLinkAddFile == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkAddFile is not found") - return (__nvJitLinkAddFile)( - handle, inputType, fileName) - - -cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: - global __nvJitLinkComplete - _check_or_init_nvjitlink() - if __nvJitLinkComplete == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkComplete is not found") - return (__nvJitLinkComplete)( - handle) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedCubinSize - _check_or_init_nvjitlink() - if __nvJitLinkGetLinkedCubinSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") - return (__nvJitLinkGetLinkedCubinSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: - global __nvJitLinkGetLinkedCubin - _check_or_init_nvjitlink() - if __nvJitLinkGetLinkedCubin == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") - return (__nvJitLinkGetLinkedCubin)( - handle, cubin) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetLinkedPtxSize - _check_or_init_nvjitlink() - if __nvJitLinkGetLinkedPtxSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") - return (__nvJitLinkGetLinkedPtxSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: - global __nvJitLinkGetLinkedPtx - _check_or_init_nvjitlink() - if __nvJitLinkGetLinkedPtx == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") - return (__nvJitLinkGetLinkedPtx)( - handle, ptx) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetErrorLogSize - _check_or_init_nvjitlink() - if __nvJitLinkGetErrorLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") - return (__nvJitLinkGetErrorLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetErrorLog - _check_or_init_nvjitlink() - if __nvJitLinkGetErrorLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") - return (__nvJitLinkGetErrorLog)( - handle, log) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: - global __nvJitLinkGetInfoLogSize - _check_or_init_nvjitlink() - if __nvJitLinkGetInfoLogSize == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") - return (__nvJitLinkGetInfoLogSize)( - handle, size) - - -cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - global __nvJitLinkGetInfoLog - _check_or_init_nvjitlink() - if __nvJitLinkGetInfoLog == NULL: - with gil: - raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") - return (__nvJitLinkGetInfoLog)( - handle, log) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index f0aaee771..592464487 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -61,7 +61,7 @@ 'cuda_gl_interop.h', 'cuda_vdpau_interop.h'], 'nvrtc' : ['nvrtc.h'], - 'nvJitLink' : ['nvJitLink.h'],} + 'nvJitLink' : ['nvJitLink.h']} replace = {' __device_builtin__ ':' ', 'CUDARTAPI ':' ', @@ -93,13 +93,16 @@ break if not os.path.exists(path): print(f'Missing header {header}') + print(f'Parsing {library} headers') parser = CParser(header_paths, cache='./cache_{}'.format(library.split('.')[0]) if PARSER_CACHING else None, replace=replace) + if library == 'driver': CUDA_VERSION = parser.defs['macros']['CUDA_VERSION'] if 'CUDA_VERSION' in parser.defs['macros'] else 'Unknown' print(f'Found CUDA_VERSION: {CUDA_VERSION}') + # Combine types with others since they sometimes get tangled found_types += {key for key in parser.defs['types']} found_types += {key for key in parser.defs['structs']} @@ -109,13 +112,16 @@ found_types += {key for key in parser.defs['enums']} found_functions += {key for key in parser.defs['functions']} found_values += {key for key in parser.defs['values']} + if len(found_functions) == 0: raise RuntimeError(f'Parser found no functions. Is CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') + # Unwrap struct and union members def unwrapMembers(found_dict): for key in found_dict: members = [var for var, _, _ in found_dict[key]['members']] found_dict[key]['members'] = members + unwrapMembers(found_structs) unwrapMembers(found_unions) diff --git a/cuda_bindings/tests/test_nvJitLink.py b/cuda_bindings/tests/test_nvJitLink.py deleted file mode 100644 index f566ae7c6..000000000 --- a/cuda_bindings/tests/test_nvJitLink.py +++ /dev/null @@ -1,164 +0,0 @@ -import pytest -from cuda import nvJitLink - -def test_create_no_arch_error(): - # nvjitlink expects at least the architecture to be specified. - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): - nvJitLink.create() - - -def test_invalid_arch_error(): - # sm_XX is not a valid architecture - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-arch=sm_XX") - - -def test_unrecognized_option_error(): - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvJitLink.create("-fictitious_option") - - -def test_invalid_option_type_error(): - with pytest.raises(TypeError, match="Expecting only strings"): - nvJitLink.create("-arch", 53) - - -def test_create_and_destroy(): - handle = nvJitLink.create("-arch=sm_53") - assert handle != 0 - nvJitLink.destroy(handle) - - -def test_complete_empty(): - handle = nvJitLink.create("-arch=sm_75") - nvJitLink.complete(handle) - nvJitLink.destroy(handle) - - -@pytest.mark.parametrize( - "input_file,input_type", - [ - ("device_functions_cubin", nvJitLink.InputType.CUBIN), - ("device_functions_fatbin", InputType.FATBIN), - ("device_functions_ptx", InputType.PTX), - ("device_functions_object", InputType.OBJECT), - ("device_functions_archive", InputType.LIBRARY), - ], -) -def test_add_file(input_file, input_type, gpu_arch_flag, request): - filename, data = request.getfixturevalue(input_file) - - handle = nvJitLink.create(gpu_arch_flag) - nvJitLink.add_data(handle, input_type.value, data, filename) - nvJitLink.destroy(handle) - - -# We test the LTO input case separately as it requires the `-lto` flag. The -# OBJECT input type is used because the LTO-IR container is packaged in an ELF -# object when produced by NVCC. -def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, InputType.OBJECT.value, data, filename) - nvJitLink.destroy(handle) - - -def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = undefined_extern_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError): - nvJitLink.complete(handle) - error_log = nvJitLink.get_error_log(handle) - nvJitLink.destroy(handle) - assert ( - "Undefined reference to '_Z5undefff' " - "in 'undefined_extern.cubin'" in error_log - ) - - -def test_get_info_log(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - info_log = nvJitLink.get_info_log(handle) - nvJitLink.destroy(handle) - # Info log is empty - assert "" == info_log - - -def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - - -def test_get_linked_cubin_link_not_complete_error( - device_functions_cubin, gpu_arch_flag -): - handle = nvJitLink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = InputType.CUBIN.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - -def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - cubin = nvJitLink.get_linked_cubin(handle) - nvJitLink.destroy(handle) - - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - - -def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") - nvJitLink.add_data(handle, input_type, data, filename) - nvJitLink.complete(handle) - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) - - -def test_get_linked_ptx_link_not_complete_error( - device_functions_ltoir_object, gpu_arch_flag -): - handle = nvJitLink.create(gpu_arch_flag, "-lto", "-ptx") - filename, data = device_functions_ltoir_object - input_type = InputType.OBJECT.value - nvJitLink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvJitLink.get_linked_ptx(handle) - nvJitLink.destroy(handle) - - -def test_package_version(): - assert pynvjitlink.__version__ is not None - assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file From f7f4fe58beb9d08cebd63632d69c156b54177246 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 05:09:23 +0000 Subject: [PATCH 12/34] regenerate --- .../cuda/bindings/_internal/nvjitlink.pxd | 4 +- .../bindings/_internal/nvjitlink_linux.pyx | 7 +- .../bindings/_internal/nvjitlink_windows.pyx | 13 ++-- .../cuda/bindings/_internal/utils.pxd | 21 ++---- .../cuda/bindings/_internal/utils.pyx | 75 +++++++++---------- cuda_bindings/cuda/bindings/cynvjitlink.pxd | 15 +--- cuda_bindings/cuda/bindings/cynvjitlink.pyx | 14 +--- cuda_bindings/cuda/bindings/nvjitlink.pxd | 12 +-- cuda_bindings/cuda/bindings/nvjitlink.pyx | 30 +++++--- 9 files changed, 84 insertions(+), 107 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd index ac3a9023b..bca8867df 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. from ..cynvjitlink cimport * diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index ff7a6ca3a..ab3d42be3 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. from libc.stdint cimport intptr_t @@ -10,7 +10,6 @@ from .utils cimport get_nvjitlink_dso_version_suffix from .utils import FunctionNotFoundError, NotSupportedError - ############################################################################### # Extern ############################################################################### @@ -55,13 +54,13 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* load_library(const int driver_ver) except* with gil: cdef void* handle for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvjitlink.so" + (f".{suffix}" if suffix else suffix) + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) if handle != NULL: break else: err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvjitlink ({err_msg.decode()})') + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 5cac180f3..feddec3ca 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -2,19 +2,18 @@ # # SPDX-License-Identifier: Apache-2.0 # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. from libc.stdint cimport intptr_t -from utils cimport get_nvjitlink_dso_version_suffix +from .utils cimport get_nvjitlink_dso_version_suffix + import os import site import win32api -from utils import FunctionNotFoundError, NotSupportedError - ############################################################################### # Wrapper init @@ -51,7 +50,7 @@ cdef load_library(const int driver_ver): for suffix in get_nvjitlink_dso_version_suffix(driver_ver): if len(suffix) == 0: continue - dll_name = f"nvjitlink64_{suffix}.dll" + dll_name = f"nvJitLink64_{suffix}.dll" # First check if the DLL has been loaded by 3rd parties try: @@ -63,7 +62,7 @@ cdef load_library(const int driver_ver): # Next, check if DLLs are installed via pip for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvjitlink", "bin") + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") if not os.path.isdir(mod_path): continue os.add_dll_directory(mod_path) @@ -85,7 +84,7 @@ cdef load_library(const int driver_ver): else: break else: - raise RuntimeError('Failed to load nvjitlink') + raise RuntimeError('Failed to load nvJitLink') assert handle != 0 return handle diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index 225ab3648..be5d4ad61 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -136,13 +136,6 @@ cdef extern from * nogil: void* data() -cdef extern from "" nogil: - ctypedef struct cuComplex: - pass - ctypedef struct cuDoubleComplex: - pass - - ctypedef fused ResT: int int32_t @@ -150,10 +143,6 @@ ctypedef fused ResT: ctypedef fused PtrT: - float - double - cuComplex - cuDoubleComplex void @@ -161,10 +150,12 @@ cdef cppclass nested_resource[T]: nullable_unique_ptr[ vector[intptr_t] ] ptrs nullable_unique_ptr[ vector[vector[T]] ] nested_resource_ptr -cdef nullable_unique_ptr[ vector[ResT] ] get_resource_ptr_(object obj, ResT* __unused) -cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 0 -cdef nullable_unique_ptr[ vector[PtrT*] ] get_resource_ptrs(object obj, PtrT* __unused) -cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused) + +# accepts the output pointer as input to use the return value for exception propagation +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1 +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1 +cdef int get_char_ptrs(nullable_unique_ptr[ vector[char*] ] &in_out_ptr, object obj) except 1 +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1 cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index b575ddc03..904e08da0 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -46,51 +46,47 @@ cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*: return bufPtr -# Cython can't infer the overload by return type alone, so we need a dummy -# input argument to help it -cdef nullable_unique_ptr[ vector[ResT] ] get_resource_ptr_(object obj, ResT* __unused): - cdef nullable_unique_ptr[ vector[ResT] ] ptr - cdef vector[ResT]* vec +# Cython can't infer the ResT overload when it is wrapped in nullable_unique_ptr, +# so we need a dummy (__unused) input argument to help it +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1: if cpython.PySequence_Check(obj): vec = new vector[ResT](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop + in_out_ptr.reset(vec, True) for i in range(len(obj)): deref(vec)[i] = obj[i] - ptr.reset(vec, True) else: - ptr.reset(obj, False) - return move(ptr) + in_out_ptr.reset(obj, False) + return 0 + -cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 0: - cdef vector[ResT]* vec +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1: if cpython.PySequence_Check(obj): - vec = new vector[ResT](len(obj)) - # set the ownership immediately to avoid - # leaking the `vec` memory in case of exception - # (e.g. ResT type range overflow) - # when populating the memory in the loop + vec = new vector[PtrT*](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop in_out_ptr.reset(vec, True) for i in range(len(obj)): - deref(vec)[i] = obj[i] + deref(vec)[i] = (obj[i]) else: - in_out_ptr.reset(obj, False) - return 1 + in_out_ptr.reset(obj, False) + return 0 -cdef nullable_unique_ptr[ vector[PtrT*] ] get_resource_ptrs(object obj, PtrT* __unused): - cdef nullable_unique_ptr[ vector[PtrT*] ] ptr - cdef vector[PtrT*]* vec +cdef int get_char_ptrs(nullable_unique_ptr[ vector[char*] ] &in_out_ptr, object obj) except 1: if cpython.PySequence_Check(obj): - vec = new vector[PtrT*](len(obj)) + vec = new vector[char*](len(obj)) + in_out_ptr.reset(vec, True) for i in range(len(obj)): - deref(vec)[i] = (obj[i]) - ptr.reset(vec, True) + #__TODO__ is there a lifetime difference between this char* and some other ptrT* + deref(vec)[i] = obj[i] else: - ptr.reset(obj, False) - return move(ptr) + in_out_ptr.reset(obj, False) + return 0 -cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused): - cdef nested_resource[ResT] res +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1: cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr cdef vector[intptr_t]* nested_vec = NULL @@ -102,26 +98,28 @@ cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused): length = len(obj) nested_res_vec = new vector[vector[ResT]](length) nested_vec = new vector[intptr_t](length) + # set the ownership immediately to avoid leaking memory in case of + # exception in the following loop + nested_res_ptr.reset(nested_res_vec, True) + nested_ptr.reset(nested_vec, True) for i, obj_i in enumerate(obj): deref(nested_res_vec)[i] = obj_i deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) - nested_res_ptr.reset(nested_res_vec, True) - nested_ptr.reset(nested_vec, True) elif cpython.PySequence_Check(obj): length = len(obj) nested_vec = new vector[intptr_t](length) + nested_ptr.reset(nested_vec, True) for i, addr in enumerate(obj): deref(nested_vec)[i] = addr nested_res_ptr.reset(NULL, False) - nested_ptr.reset(nested_vec, True) else: # obj is an int (ResT**) nested_res_ptr.reset(NULL, False) nested_ptr.reset(obj, False) - res.ptrs = move(nested_ptr) - res.nested_resource_ptr = move(nested_res_ptr) - return move(res) + in_out_ptr.ptrs = move(nested_ptr) + in_out_ptr.nested_resource_ptr = move(nested_res_ptr) + return 0 class FunctionNotFoundError(RuntimeError): pass @@ -130,10 +128,7 @@ class NotSupportedError(RuntimeError): pass cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): - # applicable to both cuBLAS and cuBLASLt - if 11000 <= driver_ver < 12000: - return ('11', '') - elif 12000 <= driver_ver < 13000: - return ('12', '11', '') + if 12000 <= driver_ver < 13000: + return ('12', '') else: - raise NotSupportedError('only CUDA 11/12 driver is supported') \ No newline at end of file + raise NotSupportedError('only CUDA 12 driver is supported') diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 3f4134706..45c80d3af 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -1,19 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. - -<<<<<<< HEAD -<<<<<<< HEAD -from libc.stdint cimport uint32_t -======= from libc.stdint cimport intptr_t, uint32_t ->>>>>>> 5d60eb1 (more changes) -======= -from libc.stdint cimport uint32_t ->>>>>>> 8c4029f (working) ############################################################################### @@ -74,4 +65,4 @@ cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) ex cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil -cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil \ No newline at end of file +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx index c91948f03..3d55097b0 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -1,18 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. from ._internal cimport nvjitlink as _nvjitlink -<<<<<<< HEAD -<<<<<<< HEAD -from libc.stdint cimport uint32_t -======= ->>>>>>> 5d60eb1 (more changes) -======= -from libc.stdint cimport uint32_t ->>>>>>> 8c4029f (working) ############################################################################### @@ -68,4 +60,4 @@ cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* siz cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: - return _nvjitlink._nvJitLinkGetInfoLog(handle, log) \ No newline at end of file + return _nvjitlink._nvJitLinkGetInfoLog(handle, log) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd index de4d46170..3cec24841 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -1,8 +1,8 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. from libc.stdint cimport intptr_t, uint32_t @@ -20,15 +20,15 @@ ctypedef nvJitLinkHandle Handle # Enum ############################################################################### -ctypedef nvJitLinkResult _NvJitLinkResult -ctypedef nvJitLinkInputType _NvJitLinkInputType +ctypedef nvJitLinkResult _Result +ctypedef nvJitLinkInputType _InputType ############################################################################### # Functions ############################################################################### -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options) +cpdef intptr_t create(uint32_t num_options, options) except -1 cpdef destroy(intptr_t handle) cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name) cpdef add_file(intptr_t handle, int input_type, intptr_t file_name) @@ -40,4 +40,4 @@ cpdef get_linked_ptx(intptr_t handle, intptr_t ptx) cpdef get_error_log_size(intptr_t handle, intptr_t size) cpdef get_error_log(intptr_t handle, intptr_t log) cpdef get_info_log_size(intptr_t handle, intptr_t size) -cpdef get_info_log(intptr_t handle, intptr_t log) \ No newline at end of file +cpdef get_info_log(intptr_t handle, intptr_t log) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 8c1a89976..9156d970c 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -1,19 +1,23 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # -# This code was automatically generated across versions from 12.0.76 to 12.6.77. Do not modify it directly. +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. cimport cython # NOQA +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs, get_char_ptrs) + from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector ############################################################################### # Enum ############################################################################### -class NvJitLinkResult(_IntEnum): +class Result(_IntEnum): """See `nvJitLinkResult`.""" SUCCESS = NVJITLINK_SUCCESS ERROR_UNRECOGNIZED_OPTION = NVJITLINK_ERROR_UNRECOGNIZED_OPTION @@ -26,7 +30,7 @@ class NvJitLinkResult(_IntEnum): ERROR_UNRECOGNIZED_INPUT = NVJITLINK_ERROR_UNRECOGNIZED_INPUT ERROR_FINALIZE = NVJITLINK_ERROR_FINALIZE -class NvJitLinkInputType(_IntEnum): +class InputType(_IntEnum): """See `nvJitLinkInputType`.""" INPUT_NONE = NVJITLINK_INPUT_NONE INPUT_CUBIN = NVJITLINK_INPUT_CUBIN @@ -65,20 +69,26 @@ class nvJitLinkError(Exception): @cython.profile(False) -cdef inline void check_status(int status) nogil: +cdef int check_status(int status) except 1 nogil: if status != 0: with gil: raise nvJitLinkError(status) + return status ############################################################################### # Wrapper functions ############################################################################### -cpdef create(intptr_t handle, uint32_t num_options, intptr_t options): +cpdef intptr_t create(uint32_t num_options, options) except -1: + cdef list converted_options = [((s)).encode() for s in options] + cdef nullable_unique_ptr[ vector[char*] ] _options_ + get_char_ptrs(_options_, converted_options) + cdef Handle handle with nogil: - status = nvJitLinkCreate(handle, num_options, options) + status = nvJitLinkCreate(&handle, num_options, (_options_.data())) check_status(status) + return handle cpdef destroy(intptr_t handle): @@ -89,13 +99,13 @@ cpdef destroy(intptr_t handle): cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name): with nogil: - status = nvJitLinkAddData(handle, <_NvJitLinkInputType>input_type, data, size, name) + status = nvJitLinkAddData(handle, <_InputType>input_type, data, size, name) check_status(status) cpdef add_file(intptr_t handle, int input_type, intptr_t file_name): with nogil: - status = nvJitLinkAddFile(handle, <_NvJitLinkInputType>input_type, file_name) + status = nvJitLinkAddFile(handle, <_InputType>input_type, file_name) check_status(status) @@ -150,4 +160,4 @@ cpdef get_info_log_size(intptr_t handle, intptr_t size): cpdef get_info_log(intptr_t handle, intptr_t log): with nogil: status = nvJitLinkGetInfoLog(handle, log) - check_status(status) \ No newline at end of file + check_status(status) From 990f4cbe308051df256346c18aafd8e443ab1875 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 05:11:14 +0000 Subject: [PATCH 13/34] clean up a bit --- cuda_bindings/setup.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 592464487..9d0f6fad3 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -60,8 +60,7 @@ 'cuda_egl_interop.h', 'cuda_gl_interop.h', 'cuda_vdpau_interop.h'], - 'nvrtc' : ['nvrtc.h'], - 'nvJitLink' : ['nvJitLink.h']} + 'nvrtc' : ['nvrtc.h']} replace = {' __device_builtin__ ':' ', 'CUDARTAPI ':' ', @@ -98,7 +97,7 @@ parser = CParser(header_paths, cache='./cache_{}'.format(library.split('.')[0]) if PARSER_CACHING else None, replace=replace) - + if library == 'driver': CUDA_VERSION = parser.defs['macros']['CUDA_VERSION'] if 'CUDA_VERSION' in parser.defs['macros'] else 'Unknown' print(f'Found CUDA_VERSION: {CUDA_VERSION}') @@ -121,7 +120,7 @@ def unwrapMembers(found_dict): for key in found_dict: members = [var for var, _, _ in found_dict[key]['members']] found_dict[key]['members'] = members - + unwrapMembers(found_structs) unwrapMembers(found_unions) @@ -198,7 +197,6 @@ def prep_extensions(sources): pattern = sources[0] files = glob.glob(pattern) exts = [] - print(include_dirs, library_dirs) for pyx in files: mod_name = pyx.replace(".pyx", "").replace(os.sep, ".").replace("/", ".") exts.append( @@ -215,8 +213,10 @@ def prep_extensions(sources): ) return exts + # new path for the bindings from cybind -def rename_architecture_specific_files(path): +def rename_architecture_specific_files(): + architechture_specific_files_dir = 'cuda/bindings/_internal/' if sys.platform == 'linux': src_files = glob.glob(os.path.join(path, '*_linux.pyx')) elif sys.platform == 'win32': @@ -234,16 +234,20 @@ def rename_architecture_specific_files(path): # atomic move with the destination guaranteed to be overwritten os.replace(f_name, f"./{dst}") dst_files.append(dst) + return dst_files + + +dst_files = rename_architecture_specific_files() + @atexit.register def cleanup_dst_files(): - for dst in architechture_specific_files_dir: + for dst in dst_files: try: os.remove(dst) except FileNotFoundError: pass - -architechture_specific_files_dir = 'cuda/bindings/_internal/' + def do_cythonize(extensions): return cythonize( @@ -254,7 +258,6 @@ def do_cythonize(extensions): ), **extra_cythonize_kwargs) -rename_architecture_specific_files(architechture_specific_files_dir) sources_list = [ # private @@ -307,7 +310,6 @@ def finalize_options(self): find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], ), - cmdclass=cmdclass, zip_safe=False, -) \ No newline at end of file +) From fa87ea80280bada27bc788ddc88e2f68989cd072 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 05:19:40 +0000 Subject: [PATCH 14/34] strip input enumerator prefix --- cuda_bindings/cuda/bindings/nvjitlink.pyx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 9156d970c..bfc6acb22 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -32,15 +32,15 @@ class Result(_IntEnum): class InputType(_IntEnum): """See `nvJitLinkInputType`.""" - INPUT_NONE = NVJITLINK_INPUT_NONE - INPUT_CUBIN = NVJITLINK_INPUT_CUBIN - INPUT_PTX = NVJITLINK_INPUT_PTX - INPUT_LTOIR = NVJITLINK_INPUT_LTOIR - INPUT_FATBIN = NVJITLINK_INPUT_FATBIN - INPUT_OBJECT = NVJITLINK_INPUT_OBJECT - INPUT_LIBRARY = NVJITLINK_INPUT_LIBRARY - INPUT_INDEX = NVJITLINK_INPUT_INDEX - INPUT_ANY = NVJITLINK_INPUT_ANY + NONE = NVJITLINK_INPUT_NONE + CUBIN = NVJITLINK_INPUT_CUBIN + PTX = NVJITLINK_INPUT_PTX + LTOIR = NVJITLINK_INPUT_LTOIR + FATBIN = NVJITLINK_INPUT_FATBIN + OBJECT = NVJITLINK_INPUT_OBJECT + LIBRARY = NVJITLINK_INPUT_LIBRARY + INDEX = NVJITLINK_INPUT_INDEX + ANY = NVJITLINK_INPUT_ANY ############################################################################### From 5f13031103608b09b1a49fa5a63800076f4252a0 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 05:50:38 +0000 Subject: [PATCH 15/34] hand-write nvJitLinkDestroy lowpp binding; turn on doxygen --- cuda_bindings/cuda/bindings/nvjitlink.pxd | 1 - cuda_bindings/cuda/bindings/nvjitlink.pyx | 117 +++++++++++++++++++++- 2 files changed, 112 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd index 3cec24841..689820721 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -29,7 +29,6 @@ ctypedef nvJitLinkInputType _InputType ############################################################################### cpdef intptr_t create(uint32_t num_options, options) except -1 -cpdef destroy(intptr_t handle) cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name) cpdef add_file(intptr_t handle, int input_type, intptr_t file_name) cpdef complete(intptr_t handle) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index bfc6acb22..dedf49ea1 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -80,7 +80,29 @@ cdef int check_status(int status) except 1 nogil: # Wrapper functions ############################################################################### +cpdef destroy(intptr_t handle): + cdef Handle h = handle + with nogil: + status = nvJitLinkDestroy(&h) + check_status(status) + + cpdef intptr_t create(uint32_t num_options, options) except -1: + """nvJitLinkCreate creates an instance of nvJitLinkHandle with the given input options, and sets the output parameter ``handle``. + + Args: + num_options (uint32_t): Number of options passed. + options (object): Array of size ``num_options`` of option strings. It can be: + + - an :class:`int` as the pointer address to the array, or + - a Python sequence of ``char*``. + + + Returns: + intptr_t: Address of nvJitLink handle. + + .. seealso:: `nvJitLinkCreate` + """ cdef list converted_options = [((s)).encode() for s in options] cdef nullable_unique_ptr[ vector[char*] ] _options_ get_char_ptrs(_options_, converted_options) @@ -91,73 +113,158 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: return handle -cpdef destroy(intptr_t handle): - with nogil: - status = nvJitLinkDestroy(handle) - check_status(status) +cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name): + """nvJitLinkAddData adds data image to the link. + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + data (intptr_t): pointer to data image in memory. + size (size_t): size of the data. + name (intptr_t): name of input object. -cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name): + .. seealso:: `nvJitLinkAddData` + """ with nogil: status = nvJitLinkAddData(handle, <_InputType>input_type, data, size, name) check_status(status) cpdef add_file(intptr_t handle, int input_type, intptr_t file_name): + """nvJitLinkAddFile reads data from file and links it in. + + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + file_name (intptr_t): name of file. + + .. seealso:: `nvJitLinkAddFile` + """ with nogil: status = nvJitLinkAddFile(handle, <_InputType>input_type, file_name) check_status(status) cpdef complete(intptr_t handle): + """nvJitLinkComplete does the actual link. + + Args: + handle (intptr_t): nvJitLink handle. + + .. seealso:: `nvJitLinkComplete` + """ with nogil: status = nvJitLinkComplete(handle) check_status(status) cpdef get_linked_cubin_size(intptr_t handle, intptr_t size): + """nvJitLinkGetLinkedCubinSize gets the size of the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + size (intptr_t): Size of the linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubinSize` + """ with nogil: status = nvJitLinkGetLinkedCubinSize(handle, size) check_status(status) cpdef get_linked_cubin(intptr_t handle, intptr_t cubin): + """nvJitLinkGetLinkedCubin gets the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + cubin (intptr_t): The linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubin` + """ with nogil: status = nvJitLinkGetLinkedCubin(handle, cubin) check_status(status) cpdef get_linked_ptx_size(intptr_t handle, intptr_t size): + """nvJitLinkGetLinkedPtxSize gets the size of the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + size (intptr_t): Size of the linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtxSize` + """ with nogil: status = nvJitLinkGetLinkedPtxSize(handle, size) check_status(status) cpdef get_linked_ptx(intptr_t handle, intptr_t ptx): + """nvJitLinkGetLinkedPtx gets the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + ptx (intptr_t): The linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtx` + """ with nogil: status = nvJitLinkGetLinkedPtx(handle, ptx) check_status(status) cpdef get_error_log_size(intptr_t handle, intptr_t size): + """nvJitLinkGetErrorLogSize gets the size of the error log. + + Args: + handle (intptr_t): nvJitLink handle. + size (intptr_t): Size of the error log. + + .. seealso:: `nvJitLinkGetErrorLogSize` + """ with nogil: status = nvJitLinkGetErrorLogSize(handle, size) check_status(status) cpdef get_error_log(intptr_t handle, intptr_t log): + """nvJitLinkGetErrorLog puts any error messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (intptr_t): The error log. + + .. seealso:: `nvJitLinkGetErrorLog` + """ with nogil: status = nvJitLinkGetErrorLog(handle, log) check_status(status) cpdef get_info_log_size(intptr_t handle, intptr_t size): + """nvJitLinkGetInfoLogSize gets the size of the info log. + + Args: + handle (intptr_t): nvJitLink handle. + size (intptr_t): Size of the info log. + + .. seealso:: `nvJitLinkGetInfoLogSize` + """ with nogil: status = nvJitLinkGetInfoLogSize(handle, size) check_status(status) cpdef get_info_log(intptr_t handle, intptr_t log): + """nvJitLinkGetInfoLog puts any info messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (intptr_t): The info log. + + .. seealso:: `nvJitLinkGetInfoLog` + """ with nogil: status = nvJitLinkGetInfoLog(handle, log) check_status(status) From f167588d6e93d02bbad0aca695f7e8a0f3256a02 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 21:01:39 +0000 Subject: [PATCH 16/34] switch from NSTR to improved NSEQ; purge NSTR --- cuda_bindings/cuda/bindings/_internal/utils.pxd | 1 + cuda_bindings/cuda/bindings/_internal/utils.pyx | 7 ++++++- cuda_bindings/cuda/bindings/nvjitlink.pyx | 13 +++++++------ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index be5d4ad61..75ec69dfb 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -140,6 +140,7 @@ ctypedef fused ResT: int int32_t int64_t + char ctypedef fused PtrT: diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 904e08da0..a0b36bbd2 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -103,7 +103,12 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, nested_res_ptr.reset(nested_res_vec, True) nested_ptr.reset(nested_vec, True) for i, obj_i in enumerate(obj): - deref(nested_res_vec)[i] = obj_i + if ResT is char: + obj_i_bytes = ((obj_i)).encode() + obj_i_ptr = (obj_i_bytes) + deref(nested_res_vec)[i].assign(obj_i_ptr, obj_i_ptr + length) + else: + deref(nested_res_vec)[i] = obj_i deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) elif cpython.PySequence_Check(obj): length = len(obj) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index dedf49ea1..031e1f86e 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -94,8 +94,10 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: num_options (uint32_t): Number of options passed. options (object): Array of size ``num_options`` of option strings. It can be: - - an :class:`int` as the pointer address to the array, or - - a Python sequence of ``char*``. + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. Returns: @@ -103,12 +105,11 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: .. seealso:: `nvJitLinkCreate` """ - cdef list converted_options = [((s)).encode() for s in options] - cdef nullable_unique_ptr[ vector[char*] ] _options_ - get_char_ptrs(_options_, converted_options) + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) cdef Handle handle with nogil: - status = nvJitLinkCreate(&handle, num_options, (_options_.data())) + status = nvJitLinkCreate(&handle, num_options, (_options_.ptrs.data())) check_status(status) return handle From ea3d2262837b8c0361dce86f64995f17ee7938e9 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 21:58:48 +0000 Subject: [PATCH 17/34] fix inner vector size & clean-up --- cuda_bindings/cuda/bindings/_internal/utils.pxd | 1 - cuda_bindings/cuda/bindings/_internal/utils.pyx | 17 ++++------------- cuda_bindings/cuda/bindings/nvjitlink.pyx | 2 +- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index 75ec69dfb..2b45ced3b 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -155,7 +155,6 @@ cdef cppclass nested_resource[T]: # accepts the output pointer as input to use the return value for exception propagation cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1 cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1 -cdef int get_char_ptrs(nullable_unique_ptr[ vector[char*] ] &in_out_ptr, object obj) except 1 cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1 cdef bint is_nested_sequence(data) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index a0b36bbd2..9c5626155 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -74,18 +74,6 @@ cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, obj return 0 -cdef int get_char_ptrs(nullable_unique_ptr[ vector[char*] ] &in_out_ptr, object obj) except 1: - if cpython.PySequence_Check(obj): - vec = new vector[char*](len(obj)) - in_out_ptr.reset(vec, True) - for i in range(len(obj)): - #__TODO__ is there a lifetime difference between this char* and some other ptrT* - deref(vec)[i] = obj[i] - else: - in_out_ptr.reset(obj, False) - return 0 - - cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1: cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr @@ -105,8 +93,11 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, for i, obj_i in enumerate(obj): if ResT is char: obj_i_bytes = ((obj_i)).encode() + str_len = (len(obj_i_bytes)) + 1 # including null termination + deref(nested_res_vec)[i].resize(str_len) obj_i_ptr = (obj_i_bytes) - deref(nested_res_vec)[i].assign(obj_i_ptr, obj_i_ptr + length) + # cast to size_t explicitly to work around a potentially Cython bug + deref(nested_res_vec)[i].assign(obj_i_ptr, obj_i_ptr + str_len) else: deref(nested_res_vec)[i] = obj_i deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 031e1f86e..7fc401500 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -7,7 +7,7 @@ cimport cython # NOQA from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, - get_buffer_pointer, get_resource_ptrs, get_char_ptrs) + get_buffer_pointer, get_resource_ptrs) from enum import IntEnum as _IntEnum from libcpp.vector cimport vector From c1d21daa76b3fef0987f3fda41011cdca06b0d18 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 23:07:32 +0000 Subject: [PATCH 18/34] use autogen'd error code --- cuda_bindings/cuda/bindings/nvjitlink.pyx | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 7fc401500..450323810 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -47,21 +47,12 @@ class InputType(_IntEnum): # Error handling ############################################################################### -cdef dict STATUS={ - NVJITLINK_SUCCESS : 'NVJITLINK_SUCCESS', - NVJITLINK_ERROR_UNRECOGNIZED_OPTION : 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', - NVJITLINK_ERROR_MISSING_ARCH : 'NVJITLINK_ERROR_MISSING_ARCH', - NVJITLINK_ERROR_INVALID_INPUT : 'NVJITLINK_ERROR_INVALID_INPUT', - NVJITLINK_ERROR_PTX_COMPILE : 'NVJITLINK_ERROR_PTX_COMPILE', - NVJITLINK_ERROR_NVVM_COMPILE : 'NVJITLINK_ERROR_NVVM_COMPILE', - NVJITLINK_ERROR_INTERNAL : 'NVJITLINK_ERROR_INTERNAL' -} - class nvJitLinkError(Exception): def __init__(self, status): self.status = status - cdef str err = STATUS[status] + s = Result(status) + cdef str err = f"{s.name} ({s.value})" super(nvJitLinkError, self).__init__(err) def __reduce__(self): From 0b836e00d30cc3631c39d9cb78671eab37120b83 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 19 Oct 2024 23:42:08 +0000 Subject: [PATCH 19/34] fix input arg conversions & output args; add nvJitLinkVersion --- .../cuda/bindings/_internal/nvjitlink.pxd | 1 + .../bindings/_internal/nvjitlink_linux.pyx | 21 +++++ .../bindings/_internal/nvjitlink_windows.pyx | 20 +++++ cuda_bindings/cuda/bindings/cynvjitlink.pxd | 1 + cuda_bindings/cuda/bindings/cynvjitlink.pyx | 4 + cuda_bindings/cuda/bindings/nvjitlink.pxd | 13 +-- cuda_bindings/cuda/bindings/nvjitlink.pyx | 79 ++++++++++++++----- 7 files changed, 115 insertions(+), 24 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd index bca8867df..5f717d4d8 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd @@ -24,3 +24,4 @@ cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* s cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index ab3d42be3..146832f0e 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -49,6 +49,7 @@ cdef void* __nvJitLinkGetErrorLogSize = NULL cdef void* __nvJitLinkGetErrorLog = NULL cdef void* __nvJitLinkGetInfoLogSize = NULL cdef void* __nvJitLinkGetInfoLog = NULL +cdef void* __nvJitLinkVersion = NULL cdef void* load_library(const int driver_ver) except* with gil: @@ -181,6 +182,13 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: if handle == NULL: handle = load_library(driver_ver) __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + global __nvJitLinkVersion + __nvJitLinkVersion = dlsym(RTLD_DEFAULT, 'nvJitLinkVersion') + if __nvJitLinkVersion == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkVersion = dlsym(handle, 'nvJitLinkVersion') __py_nvjitlink_init = True return 0 @@ -235,6 +243,9 @@ cpdef dict _inspect_function_pointers(): global __nvJitLinkGetInfoLog data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + global __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion func_ptrs = data return data @@ -379,3 +390,13 @@ cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) exc raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") return (__nvJitLinkGetInfoLog)( handle, log) + + +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + global __nvJitLinkVersion + _check_or_init_nvjitlink() + if __nvJitLinkVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkVersion is not found") + return (__nvJitLinkVersion)( + major, minor) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index feddec3ca..a6a378f86 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -38,6 +38,7 @@ cdef void* __nvJitLinkGetErrorLogSize = NULL cdef void* __nvJitLinkGetErrorLog = NULL cdef void* __nvJitLinkGetInfoLogSize = NULL cdef void* __nvJitLinkGetInfoLog = NULL +cdef void* __nvJitLinkVersion = NULL cdef inline list get_site_packages(): @@ -192,6 +193,12 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') except: pass + + global __nvJitLinkVersion + try: + __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') + except: + pass __py_nvjitlink_init = True return 0 @@ -246,6 +253,9 @@ cpdef dict _inspect_function_pointers(): global __nvJitLinkGetInfoLog data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + global __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion func_ptrs = data return data @@ -390,3 +400,13 @@ cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) exc raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") return (__nvJitLinkGetInfoLog)( handle, log) + + +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + global __nvJitLinkVersion + _check_or_init_nvjitlink() + if __nvJitLinkVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkVersion is not found") + return (__nvJitLinkVersion)( + major, minor) diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 45c80d3af..6c2194736 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -66,3 +66,4 @@ cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* si cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx index 3d55097b0..2a8695434 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -61,3 +61,7 @@ cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* siz cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: return _nvjitlink._nvJitLinkGetInfoLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + return _nvjitlink._nvJitLinkVersion(major, minor) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd index 689820721..2b8841cd5 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -29,14 +29,15 @@ ctypedef nvJitLinkInputType _InputType ############################################################################### cpdef intptr_t create(uint32_t num_options, options) except -1 -cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name) -cpdef add_file(intptr_t handle, int input_type, intptr_t file_name) +cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name) +cpdef add_file(intptr_t handle, int input_type, file_name) cpdef complete(intptr_t handle) -cpdef get_linked_cubin_size(intptr_t handle, intptr_t size) +cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0 cpdef get_linked_cubin(intptr_t handle, intptr_t cubin) -cpdef get_linked_ptx_size(intptr_t handle, intptr_t size) +cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0 cpdef get_linked_ptx(intptr_t handle, intptr_t ptx) -cpdef get_error_log_size(intptr_t handle, intptr_t size) +cpdef size_t get_error_log_size(intptr_t handle) except? 0 cpdef get_error_log(intptr_t handle, intptr_t log) -cpdef get_info_log_size(intptr_t handle, intptr_t size) +cpdef size_t get_info_log_size(intptr_t handle) except? 0 cpdef get_info_log(intptr_t handle, intptr_t log) +cpdef tuple version() diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 450323810..b75596f6c 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -105,7 +105,7 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: return handle -cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intptr_t name): +cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name): """nvJitLinkAddData adds data image to the link. Args: @@ -113,27 +113,35 @@ cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, intp input_type (InputType): kind of input. data (intptr_t): pointer to data image in memory. size (size_t): size of the data. - name (intptr_t): name of input object. + name (str): name of input object. .. seealso:: `nvJitLinkAddData` """ + if not isinstance(name, str): + raise TypeError("name must be a Python str") + cdef bytes _temp_name_ = (name).encode() + cdef char* _name_ = _temp_name_ with nogil: - status = nvJitLinkAddData(handle, <_InputType>input_type, data, size, name) + status = nvJitLinkAddData(handle, <_InputType>input_type, data, size, _name_) check_status(status) -cpdef add_file(intptr_t handle, int input_type, intptr_t file_name): +cpdef add_file(intptr_t handle, int input_type, file_name): """nvJitLinkAddFile reads data from file and links it in. Args: handle (intptr_t): nvJitLink handle. input_type (InputType): kind of input. - file_name (intptr_t): name of file. + file_name (str): name of file. .. seealso:: `nvJitLinkAddFile` """ + if not isinstance(file_name, str): + raise TypeError("file_name must be a Python str") + cdef bytes _temp_file_name_ = (file_name).encode() + cdef char* _file_name_ = _temp_file_name_ with nogil: - status = nvJitLinkAddFile(handle, <_InputType>input_type, file_name) + status = nvJitLinkAddFile(handle, <_InputType>input_type, _file_name_) check_status(status) @@ -150,18 +158,22 @@ cpdef complete(intptr_t handle): check_status(status) -cpdef get_linked_cubin_size(intptr_t handle, intptr_t size): +cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: """nvJitLinkGetLinkedCubinSize gets the size of the linked cubin. Args: handle (intptr_t): nvJitLink handle. - size (intptr_t): Size of the linked cubin. + + Returns: + size_t: Size of the linked cubin. .. seealso:: `nvJitLinkGetLinkedCubinSize` """ + cdef size_t size with nogil: - status = nvJitLinkGetLinkedCubinSize(handle, size) + status = nvJitLinkGetLinkedCubinSize(handle, &size) check_status(status) + return size cpdef get_linked_cubin(intptr_t handle, intptr_t cubin): @@ -178,18 +190,22 @@ cpdef get_linked_cubin(intptr_t handle, intptr_t cubin): check_status(status) -cpdef get_linked_ptx_size(intptr_t handle, intptr_t size): +cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: """nvJitLinkGetLinkedPtxSize gets the size of the linked ptx. Args: handle (intptr_t): nvJitLink handle. - size (intptr_t): Size of the linked PTX. + + Returns: + size_t: Size of the linked PTX. .. seealso:: `nvJitLinkGetLinkedPtxSize` """ + cdef size_t size with nogil: - status = nvJitLinkGetLinkedPtxSize(handle, size) + status = nvJitLinkGetLinkedPtxSize(handle, &size) check_status(status) + return size cpdef get_linked_ptx(intptr_t handle, intptr_t ptx): @@ -206,18 +222,22 @@ cpdef get_linked_ptx(intptr_t handle, intptr_t ptx): check_status(status) -cpdef get_error_log_size(intptr_t handle, intptr_t size): +cpdef size_t get_error_log_size(intptr_t handle) except? 0: """nvJitLinkGetErrorLogSize gets the size of the error log. Args: handle (intptr_t): nvJitLink handle. - size (intptr_t): Size of the error log. + + Returns: + size_t: Size of the error log. .. seealso:: `nvJitLinkGetErrorLogSize` """ + cdef size_t size with nogil: - status = nvJitLinkGetErrorLogSize(handle, size) + status = nvJitLinkGetErrorLogSize(handle, &size) check_status(status) + return size cpdef get_error_log(intptr_t handle, intptr_t log): @@ -234,18 +254,22 @@ cpdef get_error_log(intptr_t handle, intptr_t log): check_status(status) -cpdef get_info_log_size(intptr_t handle, intptr_t size): +cpdef size_t get_info_log_size(intptr_t handle) except? 0: """nvJitLinkGetInfoLogSize gets the size of the info log. Args: handle (intptr_t): nvJitLink handle. - size (intptr_t): Size of the info log. + + Returns: + size_t: Size of the info log. .. seealso:: `nvJitLinkGetInfoLogSize` """ + cdef size_t size with nogil: - status = nvJitLinkGetInfoLogSize(handle, size) + status = nvJitLinkGetInfoLogSize(handle, &size) check_status(status) + return size cpdef get_info_log(intptr_t handle, intptr_t log): @@ -260,3 +284,22 @@ cpdef get_info_log(intptr_t handle, intptr_t log): with nogil: status = nvJitLinkGetInfoLog(handle, log) check_status(status) + + +cpdef tuple version(): + """nvJitLinkVersion returns the current version of nvJitLink. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvJitLinkVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + status = nvJitLinkVersion(&major, &minor) + check_status(status) + return (major, minor) From fab638245c06d79bc18defc2dce6edda85196d55 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 20 Oct 2024 19:16:26 +0000 Subject: [PATCH 20/34] start fixing tests --- cuda_bindings/tests/test_nvjitlink.py | 51 +++++++++++++-------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index 37129e4a2..ec475b04a 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -1,23 +1,21 @@ -import pytest -from cuda.bindings import nvjitlink +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -dir(nvjitlink) +import pytest -def test_create_no_arch_error(): - # nvjitlink expects at least the architecture to be specified. - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_MISSING_ARCH error"): - nvjitlink.create() +from cuda.bindings import nvjitlink def test_invalid_arch_error(): # sm_XX is not a valid architecture - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvjitlink.create("-arch=sm_XX") + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-arch=sm_XX"]) def test_unrecognized_option_error(): - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"): - nvjitlink.create("-fictitious_option") + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-fictitious_option"]) def test_invalid_option_type_error(): @@ -41,17 +39,17 @@ def test_complete_empty(): "input_file,input_type", [ ("device_functions_cubin", nvjitlink.InputType.CUBIN), - ("device_functions_fatbin", InputType.FATBIN), - ("device_functions_ptx", InputType.PTX), - ("device_functions_object", InputType.OBJECT), - ("device_functions_archive", InputType.LIBRARY), + ("device_functions_fatbin", nvjitlink.InputType.FATBIN), + ("device_functions_ptx", nvjitlink.InputType.PTX), + ("device_functions_object", nvjitlink.InputType.OBJECT), + ("device_functions_archive", nvjitlink.InputType.LIBRARY), ], ) def test_add_file(input_file, input_type, gpu_arch_flag, request): filename, data = request.getfixturevalue(input_file) handle = nvjitlink.create(gpu_arch_flag) - nvjitlink.add_data(handle, input_type.value, data, filename) + nvjitlink.add_data(handle, input_type, data, filename) nvjitlink.destroy(handle) @@ -62,14 +60,14 @@ def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): filename, data = device_functions_ltoir_object handle = nvjitlink.create(gpu_arch_flag, "-lto") - nvjitlink.add_data(handle, InputType.OBJECT.value, data, filename) + nvjitlink.add_data(handle, nvjitlink.InputType.OBJECT, data, filename) nvjitlink.destroy(handle) def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): handle = nvjitlink.create(gpu_arch_flag) filename, data = undefined_extern_cubin - input_type = InputType.CUBIN.value + input_type = nvjitlink.InputType.CUBIN nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError): nvjitlink.complete(handle) @@ -84,7 +82,7 @@ def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): def test_get_info_log(device_functions_cubin, gpu_arch_flag): handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin - input_type = InputType.CUBIN.value + input_type = nvjitlink.InputType.CUBIN nvjitlink.add_data(handle, input_type, data, filename) nvjitlink.complete(handle) info_log = nvjitlink.get_info_log(handle) @@ -96,7 +94,7 @@ def test_get_info_log(device_functions_cubin, gpu_arch_flag): def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin - input_type = InputType.CUBIN.value + input_type = nvjitlink.InputType.CUBIN nvjitlink.add_data(handle, input_type, data, filename) nvjitlink.complete(handle) cubin = nvjitlink.get_linked_cubin(handle) @@ -111,7 +109,7 @@ def test_get_linked_cubin_link_not_complete_error( ): handle = nvjitlink.create(gpu_arch_flag) filename, data = device_functions_cubin - input_type = InputType.CUBIN.value + input_type = nvjitlink.InputType.CUBIN nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): nvjitlink.get_linked_cubin(handle) @@ -124,7 +122,7 @@ def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag) # containing an LTOIR container, because that is what NVCC produces when # LTO is requested. So we need to use the OBJECT input type, and the linker # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value + input_type = nvjitlink.InputType.OBJECT handle = nvjitlink.create(gpu_arch_flag, "-lto") nvjitlink.add_data(handle, input_type, data, filename) nvjitlink.complete(handle) @@ -141,7 +139,7 @@ def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): # containing an LTOIR container, because that is what NVCC produces when # LTO is requested. So we need to use the OBJECT input type, and the linker # retrieves the LTO IR from it because we passed the -lto flag. - input_type = InputType.OBJECT.value + input_type = nvjitlink.InputType.OBJECT handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") nvjitlink.add_data(handle, input_type, data, filename) nvjitlink.complete(handle) @@ -154,7 +152,7 @@ def test_get_linked_ptx_link_not_complete_error( ): handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") filename, data = device_functions_ltoir_object - input_type = InputType.OBJECT.value + input_type = nvjitlink.InputType.OBJECT nvjitlink.add_data(handle, input_type, data, filename) with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): nvjitlink.get_linked_ptx(handle) @@ -162,5 +160,6 @@ def test_get_linked_ptx_link_not_complete_error( def test_package_version(): - assert pynvjitlink.__version__ is not None - assert len(str(pynvjitlink.__version__)) > 0 \ No newline at end of file + ver = nvjitlink.version() + assert len(ver) == 2 + assert ver >= (12, 0) From 7fde00efc6a2b71a9859bb28326ba6cc938254b2 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 20 Oct 2024 23:21:15 +0000 Subject: [PATCH 21/34] clean up a bit --- cuda_bindings/cuda/bindings/cynvjitlink.pxd | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd index 6c2194736..3c22d939e 100644 --- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -37,15 +37,6 @@ ctypedef enum nvJitLinkInputType "nvJitLinkInputType": # types -cdef extern from *: - """ - #include - #include - #include - """ - ctypedef void* cudaStream_t 'cudaStream_t' - - ctypedef void* nvJitLinkHandle 'nvJitLinkHandle' From 22984359527374e8f384079a43ba6290ae9f996b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 21 Oct 2024 00:48:37 +0000 Subject: [PATCH 22/34] add destroy docstring --- cuda_bindings/cuda/bindings/nvjitlink.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index b75596f6c..6cc67d7e8 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -72,6 +72,13 @@ cdef int check_status(int status) except 1 nogil: ############################################################################### cpdef destroy(intptr_t handle): + """nvJitLinkDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvJitLink handle. + + .. seealso:: `nvJitLinkDestroy` + """ cdef Handle h = handle with nogil: status = nvJitLinkDestroy(&h) From cb6c5b4068fc5c18ca9f949cce6414a9c1bf7496 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 21 Oct 2024 17:42:01 -0700 Subject: [PATCH 23/34] update tests; regen bindings --- cuda_bindings/tests/test_nvjitlink.py | 183 ++++++++++---------------- 1 file changed, 67 insertions(+), 116 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index ec475b04a..c92a100e7 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -3,14 +3,43 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest - +import os from cuda.bindings import nvjitlink +ptx_code = """ +.version 8.5 +.target sm_90 +.address_size 64 -def test_invalid_arch_error(): - # sm_XX is not a valid architecture - with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): - nvjitlink.create(1, ["-arch=sm_XX"]) +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{ + .reg .pred %p<2>; + .reg .b32 %r<3>; + .reg .b64 %rd<3>; + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + st.global.u32 [%rd2+0], %r1; + ret; +} +""" + +minimal_kernel = """ +.version 6.4 +.target sm_75 +.address_size 64 + +.visible .entry _kernel() { + ret; +} +""" + +# Convert PTX code to bytes +ptx_bytes = ptx_code.encode('utf-8') +minimal_kernel_bytes = minimal_kernel.encode('utf-8') def test_unrecognized_option_error(): @@ -18,144 +47,66 @@ def test_unrecognized_option_error(): nvjitlink.create(1, ["-fictitious_option"]) -def test_invalid_option_type_error(): - with pytest.raises(TypeError, match="Expecting only strings"): - nvjitlink.create("-arch", 53) +def test_invalid_arch_error(): + # sm_XX is not a valid architecture + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-arch=sm_XX"]) def test_create_and_destroy(): - handle = nvjitlink.create("-arch=sm_53") + handle = nvjitlink.create(1, ["-arch=sm_53"]) assert handle != 0 nvjitlink.destroy(handle) def test_complete_empty(): - handle = nvjitlink.create("-arch=sm_75") + handle = nvjitlink.create(1, ["-arch=sm_90"]) nvjitlink.complete(handle) nvjitlink.destroy(handle) +def test_add_data(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + data = ptx_bytes + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, data, len(data), "test_data") -@pytest.mark.parametrize( - "input_file,input_type", - [ - ("device_functions_cubin", nvjitlink.InputType.CUBIN), - ("device_functions_fatbin", nvjitlink.InputType.FATBIN), - ("device_functions_ptx", nvjitlink.InputType.PTX), - ("device_functions_object", nvjitlink.InputType.OBJECT), - ("device_functions_archive", nvjitlink.InputType.LIBRARY), - ], -) -def test_add_file(input_file, input_type, gpu_arch_flag, request): - filename, data = request.getfixturevalue(input_file) - - handle = nvjitlink.create(gpu_arch_flag) - nvjitlink.add_data(handle, input_type, data, filename) - nvjitlink.destroy(handle) - - -# We test the LTO input case separately as it requires the `-lto` flag. The -# OBJECT input type is used because the LTO-IR container is packaged in an ELF -# object when produced by NVCC. -def test_add_file_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - handle = nvjitlink.create(gpu_arch_flag, "-lto") - nvjitlink.add_data(handle, nvjitlink.InputType.OBJECT, data, filename) - nvjitlink.destroy(handle) +def test_add_file(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + file_path = "test_file.cubin" + with open (file_path, "wb") as f: + f.write(ptx_bytes) - -def test_get_error_log(undefined_extern_cubin, gpu_arch_flag): - handle = nvjitlink.create(gpu_arch_flag) - filename, data = undefined_extern_cubin - input_type = nvjitlink.InputType.CUBIN - nvjitlink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError): - nvjitlink.complete(handle) - error_log = nvjitlink.get_error_log(handle) - nvjitlink.destroy(handle) - assert ( - "Undefined reference to '_Z5undefff' " - "in 'undefined_extern.cubin'" in error_log - ) - - -def test_get_info_log(device_functions_cubin, gpu_arch_flag): - handle = nvjitlink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = nvjitlink.InputType.CUBIN - nvjitlink.add_data(handle, input_type, data, filename) + nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) nvjitlink.complete(handle) - info_log = nvjitlink.get_info_log(handle) nvjitlink.destroy(handle) - # Info log is empty - assert "" == info_log + + os.remove(file_path) -def test_get_linked_cubin(device_functions_cubin, gpu_arch_flag): - handle = nvjitlink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = nvjitlink.InputType.CUBIN - nvjitlink.add_data(handle, input_type, data, filename) +def test_get_error_log(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) nvjitlink.complete(handle) - cubin = nvjitlink.get_linked_cubin(handle) + log_size = nvjitlink.get_error_log_size(handle) + log = nvjitlink.get_error_log(handle) + assert len(log) == log_size nvjitlink.destroy(handle) - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - - -def test_get_linked_cubin_link_not_complete_error( - device_functions_cubin, gpu_arch_flag -): - handle = nvjitlink.create(gpu_arch_flag) - filename, data = device_functions_cubin - input_type = nvjitlink.InputType.CUBIN - nvjitlink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvjitlink.get_linked_cubin(handle) - nvjitlink.destroy(handle) - -def test_get_linked_cubin_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = nvjitlink.InputType.OBJECT - handle = nvjitlink.create(gpu_arch_flag, "-lto") - nvjitlink.add_data(handle, input_type, data, filename) +def test_get_info_log(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) nvjitlink.complete(handle) - cubin = nvjitlink.get_linked_cubin(handle) + log_size = nvjitlink.get_info_log_size(handle) + log = nvjitlink.get_info_log(handle) + assert len(log) == log_size nvjitlink.destroy(handle) - # Just check we got something that looks like an ELF - assert cubin[:4] == b"\x7fELF" - -def test_get_linked_ptx_from_lto(device_functions_ltoir_object, gpu_arch_flag): - filename, data = device_functions_ltoir_object - # device_functions_ltoir_object is a host object containing a fatbin - # containing an LTOIR container, because that is what NVCC produces when - # LTO is requested. So we need to use the OBJECT input type, and the linker - # retrieves the LTO IR from it because we passed the -lto flag. - input_type = nvjitlink.InputType.OBJECT - handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") - nvjitlink.add_data(handle, input_type, data, filename) +def test_get_linked_cubin(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) nvjitlink.complete(handle) - nvjitlink.get_linked_ptx(handle) - nvjitlink.destroy(handle) - - -def test_get_linked_ptx_link_not_complete_error( - device_functions_ltoir_object, gpu_arch_flag -): - handle = nvjitlink.create(gpu_arch_flag, "-lto", "-ptx") - filename, data = device_functions_ltoir_object - input_type = nvjitlink.InputType.OBJECT - nvjitlink.add_data(handle, input_type, data, filename) - with pytest.raises(RuntimeError, match="NVJITLINK_ERROR_INTERNAL error"): - nvjitlink.get_linked_ptx(handle) + cubin_size = nvjitlink.get_linked_cubin_size(handle) + cubin = nvjitlink.get_linked_cubin(handle) + assert len(cubin) == cubin_size nvjitlink.destroy(handle) From e941305c9eddded4ce296d1125c13310cffdbdd8 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Mon, 21 Oct 2024 18:44:41 -0700 Subject: [PATCH 24/34] update test file TODO add some coverage for GetLinkedPtx, and expected errors once bindings are checked in --- cuda_bindings/tests/test_nvjitlink.py | 83 +++++++++++++++------------ 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index c92a100e7..605b98bfe 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -1,10 +1,16 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright 2021-2024 NVIDIA Corporation. All rights reserved. # -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# Please refer to the NVIDIA end user license agreement (EULA) associated +# with this source code for terms and conditions that govern your use of +# this software. Any use, reproduction, disclosure, or distribution of +# this software and related documentation outside the terms of the EULA +# is strictly prohibited. import pytest import os -from cuda.bindings import nvjitlink +import cuda.bindings + + ptx_code = """ .version 8.5 @@ -41,76 +47,81 @@ ptx_bytes = ptx_code.encode('utf-8') minimal_kernel_bytes = minimal_kernel.encode('utf-8') - def test_unrecognized_option_error(): - with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): - nvjitlink.create(1, ["-fictitious_option"]) + with pytest.raises(cuda.bindings.nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + cuda.bindings.nvjitlink.create(1, ["-fictitious_option"]) def test_invalid_arch_error(): # sm_XX is not a valid architecture - with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): - nvjitlink.create(1, ["-arch=sm_XX"]) + with pytest.raises(cuda.bindings.nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + cuda.bindings.nvjitlink.create(1, ["-arch=sm_XX"]) def test_create_and_destroy(): - handle = nvjitlink.create(1, ["-arch=sm_53"]) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_53"]) assert handle != 0 - nvjitlink.destroy(handle) + cuda.bindings.nvjitlink.destroy(handle) def test_complete_empty(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.complete(handle) - nvjitlink.destroy(handle) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) + cuda.bindings.nvjitlink.complete(handle) + cuda.bindings.nvjitlink.destroy(handle) def test_add_data(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) data = ptx_bytes - nvjitlink.add_data(handle, nvjitlink.InputType.ANY, data, len(data), "test_data") + cuda.bindings.nvjitlink.add_data(handle, cuda.bindings.nvjitlink.InputType.ANY, data, len(data), "test_data") + cuda.bindings.nvjitlink.complete(handle) + cuda.bindings.nvjitlink.destroy(handle) def test_add_file(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) file_path = "test_file.cubin" with open (file_path, "wb") as f: f.write(ptx_bytes) - nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) - nvjitlink.complete(handle) - nvjitlink.destroy(handle) + cuda.bindings.nvjitlink.add_file(handle, cuda.bindings.nvjitlink.InputType.ANY, str(file_path)) + cuda.bindings.nvjitlink.complete(handle) + cuda.bindings.nvjitlink.destroy(handle) os.remove(file_path) def test_get_error_log(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.complete(handle) - log_size = nvjitlink.get_error_log_size(handle) - log = nvjitlink.get_error_log(handle) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) + cuda.bindings.nvjitlink.complete(handle) + log_size = cuda.bindings.nvjitlink.get_error_log_size(handle) + log = bytearray(log_size) + cuda.bindings.nvjitlink.get_error_log(handle, log) assert len(log) == log_size - nvjitlink.destroy(handle) + cuda.bindings.nvjitlink.destroy(handle) def test_get_info_log(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.complete(handle) - log_size = nvjitlink.get_info_log_size(handle) - log = nvjitlink.get_info_log(handle) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) + cuda.bindings.nvjitlink.complete(handle) + log_size = cuda.bindings.nvjitlink.get_info_log_size(handle) + log = bytearray(log_size) + cuda.bindings.nvjitlink.get_info_log(handle, log) assert len(log) == log_size - nvjitlink.destroy(handle) + cuda.bindings.nvjitlink.destroy(handle) def test_get_linked_cubin(): - handle = nvjitlink.create(1, ["-arch=sm_90"]) - nvjitlink.complete(handle) - cubin_size = nvjitlink.get_linked_cubin_size(handle) - cubin = nvjitlink.get_linked_cubin(handle) + handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) + cuda.bindings.nvjitlink.complete(handle) + cubin_size = cuda.bindings.nvjitlink.get_linked_cubin_size(handle) + cubin = bytearray(cubin_size) + cuda.bindings.nvjitlink.get_linked_cubin(handle, cubin) assert len(cubin) == cubin_size - nvjitlink.destroy(handle) + cuda.bindings.nvjitlink.destroy(handle) +#TODO add a ptx test def test_package_version(): - ver = nvjitlink.version() + ver = cuda.bindings.nvjitlink.version() assert len(ver) == 2 - assert ver >= (12, 0) + assert ver >= (12, 0) \ No newline at end of file From df605e99626adb63960ca33b0ddb64f0bce8d026 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 22 Oct 2024 09:34:51 -0700 Subject: [PATCH 25/34] update test file --- cuda_bindings/tests/test_nvjitlink.py | 104 ++++++++++++++------------ 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index 605b98bfe..6524c4a88 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -8,11 +8,10 @@ import pytest import os -import cuda.bindings +from cuda.bindings import nvjitlink - -ptx_code = """ +ptx_kernel = """ .version 8.5 .target sm_90 .address_size 64 @@ -33,95 +32,102 @@ } """ -minimal_kernel = """ -.version 6.4 -.target sm_75 +minimal_ptx_kernel = """ +.version 8.5 +.target sm_90 .address_size 64 -.visible .entry _kernel() { +.func _MinimalKernel() +{ ret; } """ -# Convert PTX code to bytes -ptx_bytes = ptx_code.encode('utf-8') -minimal_kernel_bytes = minimal_kernel.encode('utf-8') +ptx_kernel_bytes = ptx_kernel.encode('utf-8') +minimal_ptx_kernel_bytes = minimal_ptx_kernel.encode('utf-8') def test_unrecognized_option_error(): - with pytest.raises(cuda.bindings.nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): - cuda.bindings.nvjitlink.create(1, ["-fictitious_option"]) + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-fictitious_option"]) def test_invalid_arch_error(): - # sm_XX is not a valid architecture - with pytest.raises(cuda.bindings.nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): - cuda.bindings.nvjitlink.create(1, ["-arch=sm_XX"]) + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-arch=sm_XX"]) def test_create_and_destroy(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_53"]) + handle = nvjitlink.create(1, ["-arch=sm_53"]) assert handle != 0 - cuda.bindings.nvjitlink.destroy(handle) + nvjitlink.destroy(handle) def test_complete_empty(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) - cuda.bindings.nvjitlink.complete(handle) - cuda.bindings.nvjitlink.destroy(handle) + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.complete(handle) + nvjitlink.destroy(handle) + def test_add_data(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) - data = ptx_bytes - cuda.bindings.nvjitlink.add_data(handle, cuda.bindings.nvjitlink.InputType.ANY, data, len(data), "test_data") - cuda.bindings.nvjitlink.complete(handle) - cuda.bindings.nvjitlink.destroy(handle) + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, minimal_ptx_kernel_bytes, len(minimal_ptx_kernel_bytes), "minimal_test_data") + nvjitlink.complete(handle) + nvjitlink.destroy(handle) def test_add_file(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) + handle = nvjitlink.create(1, ["-arch=sm_90"]) file_path = "test_file.cubin" with open (file_path, "wb") as f: - f.write(ptx_bytes) + f.write(ptx_kernel_bytes) - cuda.bindings.nvjitlink.add_file(handle, cuda.bindings.nvjitlink.InputType.ANY, str(file_path)) - cuda.bindings.nvjitlink.complete(handle) - cuda.bindings.nvjitlink.destroy(handle) - + nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) + nvjitlink.complete(handle) + nvjitlink.destroy(handle) os.remove(file_path) def test_get_error_log(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) - cuda.bindings.nvjitlink.complete(handle) - log_size = cuda.bindings.nvjitlink.get_error_log_size(handle) + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.complete(handle) + log_size = nvjitlink.get_error_log_size(handle) log = bytearray(log_size) - cuda.bindings.nvjitlink.get_error_log(handle, log) + nvjitlink.get_error_log(handle, log) assert len(log) == log_size - cuda.bindings.nvjitlink.destroy(handle) + nvjitlink.destroy(handle) def test_get_info_log(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) - cuda.bindings.nvjitlink.complete(handle) - log_size = cuda.bindings.nvjitlink.get_info_log_size(handle) + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.complete(handle) + log_size = nvjitlink.get_info_log_size(handle) log = bytearray(log_size) - cuda.bindings.nvjitlink.get_info_log(handle, log) + nvjitlink.get_info_log(handle, log) assert len(log) == log_size - cuda.bindings.nvjitlink.destroy(handle) + nvjitlink.destroy(handle) def test_get_linked_cubin(): - handle = cuda.bindings.nvjitlink.create(1, ["-arch=sm_90"]) - cuda.bindings.nvjitlink.complete(handle) - cubin_size = cuda.bindings.nvjitlink.get_linked_cubin_size(handle) + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.complete(handle) + cubin_size = nvjitlink.get_linked_cubin_size(handle) cubin = bytearray(cubin_size) - cuda.bindings.nvjitlink.get_linked_cubin(handle, cubin) + nvjitlink.get_linked_cubin(handle, cubin) assert len(cubin) == cubin_size - cuda.bindings.nvjitlink.destroy(handle) + nvjitlink.destroy(handle) + + +def test_get_linked_ptx(): + # TODO improve this test to call get_linked_ptx without this error + handle = nvjitlink.create(2, ["-arch=sm_90", "-lto"]) + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_NVVM_COMPILE"): + nvjitlink.complete(handle) -#TODO add a ptx test def test_package_version(): - ver = cuda.bindings.nvjitlink.version() + ver = nvjitlink.version() assert len(ver) == 2 - assert ver >= (12, 0) \ No newline at end of file + assert ver >= (12, 0) From b1536f3cebcadc44ec99d8a5a9f49ae9ed1a239e Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 22 Oct 2024 10:10:51 -0700 Subject: [PATCH 26/34] regenerate bindings --- .../bindings/_internal/nvjitlink_linux.pyx | 2 +- .../bindings/_internal/nvjitlink_windows.pyx | 2 +- .../cuda/bindings/_internal/utils.pxd | 4 + .../cuda/bindings/_internal/utils.pyx | 8 +- cuda_bindings/cuda/bindings/nvjitlink.pxd | 8 +- cuda_bindings/cuda/bindings/nvjitlink.pyx | 146 ++---------------- 6 files changed, 29 insertions(+), 141 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 146832f0e..eb882b4fb 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index a6a378f86..e50de88af 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -1,6 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # # This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index 2b45ced3b..d629179dc 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -1,3 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + from libc.stdint cimport int32_t, int64_t, intptr_t from libcpp.vector cimport vector from libcpp cimport bool as cppbool diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 9c5626155..55945ec96 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + cimport cpython from libc.stdint cimport intptr_t from libcpp.utility cimport move @@ -29,7 +33,9 @@ cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*: else: # try buffer protocol try: status = cpython.PyObject_GetBuffer(buf, &view, flags) - assert view.len == size + # when the caller does not provide a size, it is set to -1 at generate-time by cybind + if size != -1: + assert view.len == size assert view.ndim == 1 except Exception as e: adj = "writable " if not readonly else "" diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd index 2b8841cd5..59b56d2a3 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -33,11 +33,11 @@ cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name cpdef add_file(intptr_t handle, int input_type, file_name) cpdef complete(intptr_t handle) cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0 -cpdef get_linked_cubin(intptr_t handle, intptr_t cubin) +cpdef get_linked_cubin(intptr_t handle, cubin) cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0 -cpdef get_linked_ptx(intptr_t handle, intptr_t ptx) +cpdef get_linked_ptx(intptr_t handle, ptx) cpdef size_t get_error_log_size(intptr_t handle) except? 0 -cpdef get_error_log(intptr_t handle, intptr_t log) +cpdef get_error_log(intptr_t handle, log) cpdef size_t get_info_log_size(intptr_t handle) except? 0 -cpdef get_info_log(intptr_t handle, intptr_t log) +cpdef get_info_log(intptr_t handle, log) cpdef tuple version() diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 6cc67d7e8..8237ea14c 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -86,23 +86,6 @@ cpdef destroy(intptr_t handle): cpdef intptr_t create(uint32_t num_options, options) except -1: - """nvJitLinkCreate creates an instance of nvJitLinkHandle with the given input options, and sets the output parameter ``handle``. - - Args: - num_options (uint32_t): Number of options passed. - options (object): Array of size ``num_options`` of option strings. It can be: - - - an :class:`int` as the pointer address to the nested sequence, or - - a Python sequence of :class:`int`\s, each of which is a pointer address - to a valid sequence of 'char', or - - a nested Python sequence of ``str``. - - - Returns: - intptr_t: Address of nvJitLink handle. - - .. seealso:: `nvJitLinkCreate` - """ cdef nested_resource[ char ] _options_ get_nested_resource_ptr[char](_options_, options, NULL) cdef Handle handle @@ -113,17 +96,6 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name): - """nvJitLinkAddData adds data image to the link. - - Args: - handle (intptr_t): nvJitLink handle. - input_type (InputType): kind of input. - data (intptr_t): pointer to data image in memory. - size (size_t): size of the data. - name (str): name of input object. - - .. seealso:: `nvJitLinkAddData` - """ if not isinstance(name, str): raise TypeError("name must be a Python str") cdef bytes _temp_name_ = (name).encode() @@ -134,15 +106,6 @@ cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name cpdef add_file(intptr_t handle, int input_type, file_name): - """nvJitLinkAddFile reads data from file and links it in. - - Args: - handle (intptr_t): nvJitLink handle. - input_type (InputType): kind of input. - file_name (str): name of file. - - .. seealso:: `nvJitLinkAddFile` - """ if not isinstance(file_name, str): raise TypeError("file_name must be a Python str") cdef bytes _temp_file_name_ = (file_name).encode() @@ -153,29 +116,12 @@ cpdef add_file(intptr_t handle, int input_type, file_name): cpdef complete(intptr_t handle): - """nvJitLinkComplete does the actual link. - - Args: - handle (intptr_t): nvJitLink handle. - - .. seealso:: `nvJitLinkComplete` - """ with nogil: status = nvJitLinkComplete(handle) check_status(status) cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: - """nvJitLinkGetLinkedCubinSize gets the size of the linked cubin. - - Args: - handle (intptr_t): nvJitLink handle. - - Returns: - size_t: Size of the linked cubin. - - .. seealso:: `nvJitLinkGetLinkedCubinSize` - """ cdef size_t size with nogil: status = nvJitLinkGetLinkedCubinSize(handle, &size) @@ -183,31 +129,14 @@ cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: return size -cpdef get_linked_cubin(intptr_t handle, intptr_t cubin): - """nvJitLinkGetLinkedCubin gets the linked cubin. - - Args: - handle (intptr_t): nvJitLink handle. - cubin (intptr_t): The linked cubin. - - .. seealso:: `nvJitLinkGetLinkedCubin` - """ +cpdef get_linked_cubin(intptr_t handle, cubin): + cdef void* _cubin_ = get_buffer_pointer(cubin, -1, readonly=False) with nogil: - status = nvJitLinkGetLinkedCubin(handle, cubin) + status = nvJitLinkGetLinkedCubin(handle, _cubin_) check_status(status) cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: - """nvJitLinkGetLinkedPtxSize gets the size of the linked ptx. - - Args: - handle (intptr_t): nvJitLink handle. - - Returns: - size_t: Size of the linked PTX. - - .. seealso:: `nvJitLinkGetLinkedPtxSize` - """ cdef size_t size with nogil: status = nvJitLinkGetLinkedPtxSize(handle, &size) @@ -215,31 +144,14 @@ cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: return size -cpdef get_linked_ptx(intptr_t handle, intptr_t ptx): - """nvJitLinkGetLinkedPtx gets the linked ptx. - - Args: - handle (intptr_t): nvJitLink handle. - ptx (intptr_t): The linked PTX. - - .. seealso:: `nvJitLinkGetLinkedPtx` - """ +cpdef get_linked_ptx(intptr_t handle, ptx): + cdef void* _ptx_ = get_buffer_pointer(ptx, -1, readonly=False) with nogil: - status = nvJitLinkGetLinkedPtx(handle, ptx) + status = nvJitLinkGetLinkedPtx(handle, _ptx_) check_status(status) cpdef size_t get_error_log_size(intptr_t handle) except? 0: - """nvJitLinkGetErrorLogSize gets the size of the error log. - - Args: - handle (intptr_t): nvJitLink handle. - - Returns: - size_t: Size of the error log. - - .. seealso:: `nvJitLinkGetErrorLogSize` - """ cdef size_t size with nogil: status = nvJitLinkGetErrorLogSize(handle, &size) @@ -247,31 +159,14 @@ cpdef size_t get_error_log_size(intptr_t handle) except? 0: return size -cpdef get_error_log(intptr_t handle, intptr_t log): - """nvJitLinkGetErrorLog puts any error messages in the log. - - Args: - handle (intptr_t): nvJitLink handle. - log (intptr_t): The error log. - - .. seealso:: `nvJitLinkGetErrorLog` - """ +cpdef get_error_log(intptr_t handle, log): + cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) with nogil: - status = nvJitLinkGetErrorLog(handle, log) + status = nvJitLinkGetErrorLog(handle, _log_) check_status(status) cpdef size_t get_info_log_size(intptr_t handle) except? 0: - """nvJitLinkGetInfoLogSize gets the size of the info log. - - Args: - handle (intptr_t): nvJitLink handle. - - Returns: - size_t: Size of the info log. - - .. seealso:: `nvJitLinkGetInfoLogSize` - """ cdef size_t size with nogil: status = nvJitLinkGetInfoLogSize(handle, &size) @@ -279,31 +174,14 @@ cpdef size_t get_info_log_size(intptr_t handle) except? 0: return size -cpdef get_info_log(intptr_t handle, intptr_t log): - """nvJitLinkGetInfoLog puts any info messages in the log. - - Args: - handle (intptr_t): nvJitLink handle. - log (intptr_t): The info log. - - .. seealso:: `nvJitLinkGetInfoLog` - """ +cpdef get_info_log(intptr_t handle, log): + cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) with nogil: - status = nvJitLinkGetInfoLog(handle, log) + status = nvJitLinkGetInfoLog(handle, _log_) check_status(status) cpdef tuple version(): - """nvJitLinkVersion returns the current version of nvJitLink. - - Returns: - A 2-tuple containing: - - - unsigned int: The major version. - - unsigned int: The minor version. - - .. seealso:: `nvJitLinkVersion` - """ cdef unsigned int major cdef unsigned int minor with nogil: From 992ddcf6d543af37c1798bd18f32f8687451f4dc Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 22 Oct 2024 10:26:38 -0700 Subject: [PATCH 27/34] regenerate bindings --- cuda_bindings/cuda/bindings/nvjitlink.pxd | 2 +- cuda_bindings/cuda/bindings/nvjitlink.pyx | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd index 59b56d2a3..4f701ed4d 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pxd +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -29,7 +29,7 @@ ctypedef nvJitLinkInputType _InputType ############################################################################### cpdef intptr_t create(uint32_t num_options, options) except -1 -cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name) +cpdef add_data(intptr_t handle, int input_type, data, size_t size, name) cpdef add_file(intptr_t handle, int input_type, file_name) cpdef complete(intptr_t handle) cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0 diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 8237ea14c..7ffb16d9a 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -95,13 +95,14 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: return handle -cpdef add_data(intptr_t handle, int input_type, intptr_t data, size_t size, name): +cpdef add_data(intptr_t handle, int input_type, data, size_t size, name): + cdef void* _data_ = get_buffer_pointer(data, size, readonly=True) if not isinstance(name, str): raise TypeError("name must be a Python str") cdef bytes _temp_name_ = (name).encode() cdef char* _name_ = _temp_name_ with nogil: - status = nvJitLinkAddData(handle, <_InputType>input_type, data, size, _name_) + status = nvJitLinkAddData(handle, <_InputType>input_type, _data_, size, _name_) check_status(status) From b5c5c1c5b769fa60cb34980f68db939dc08cd90a Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 22 Oct 2024 10:54:18 -0700 Subject: [PATCH 28/34] regenerate with docstrings --- cuda_bindings/cuda/bindings/nvjitlink.pyx | 126 ++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx index 7ffb16d9a..01a12528e 100644 --- a/cuda_bindings/cuda/bindings/nvjitlink.pyx +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -86,6 +86,23 @@ cpdef destroy(intptr_t handle): cpdef intptr_t create(uint32_t num_options, options) except -1: + """nvJitLinkCreate creates an instance of nvJitLinkHandle with the given input options, and sets the output parameter ``handle``. + + Args: + num_options (uint32_t): Number of options passed. + options (object): Array of size ``num_options`` of option strings. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + + Returns: + intptr_t: Address of nvJitLink handle. + + .. seealso:: `nvJitLinkCreate` + """ cdef nested_resource[ char ] _options_ get_nested_resource_ptr[char](_options_, options, NULL) cdef Handle handle @@ -96,6 +113,17 @@ cpdef intptr_t create(uint32_t num_options, options) except -1: cpdef add_data(intptr_t handle, int input_type, data, size_t size, name): + """nvJitLinkAddData adds data image to the link. + + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + data (bytes): pointer to data image in memory. + size (size_t): size of the data. + name (str): name of input object. + + .. seealso:: `nvJitLinkAddData` + """ cdef void* _data_ = get_buffer_pointer(data, size, readonly=True) if not isinstance(name, str): raise TypeError("name must be a Python str") @@ -107,6 +135,15 @@ cpdef add_data(intptr_t handle, int input_type, data, size_t size, name): cpdef add_file(intptr_t handle, int input_type, file_name): + """nvJitLinkAddFile reads data from file and links it in. + + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + file_name (str): name of file. + + .. seealso:: `nvJitLinkAddFile` + """ if not isinstance(file_name, str): raise TypeError("file_name must be a Python str") cdef bytes _temp_file_name_ = (file_name).encode() @@ -117,12 +154,29 @@ cpdef add_file(intptr_t handle, int input_type, file_name): cpdef complete(intptr_t handle): + """nvJitLinkComplete does the actual link. + + Args: + handle (intptr_t): nvJitLink handle. + + .. seealso:: `nvJitLinkComplete` + """ with nogil: status = nvJitLinkComplete(handle) check_status(status) cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: + """nvJitLinkGetLinkedCubinSize gets the size of the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubinSize` + """ cdef size_t size with nogil: status = nvJitLinkGetLinkedCubinSize(handle, &size) @@ -131,6 +185,14 @@ cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: cpdef get_linked_cubin(intptr_t handle, cubin): + """nvJitLinkGetLinkedCubin gets the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + cubin (bytes): The linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubin` + """ cdef void* _cubin_ = get_buffer_pointer(cubin, -1, readonly=False) with nogil: status = nvJitLinkGetLinkedCubin(handle, _cubin_) @@ -138,6 +200,16 @@ cpdef get_linked_cubin(intptr_t handle, cubin): cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: + """nvJitLinkGetLinkedPtxSize gets the size of the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtxSize` + """ cdef size_t size with nogil: status = nvJitLinkGetLinkedPtxSize(handle, &size) @@ -146,6 +218,14 @@ cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: cpdef get_linked_ptx(intptr_t handle, ptx): + """nvJitLinkGetLinkedPtx gets the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + ptx (bytes): The linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtx` + """ cdef void* _ptx_ = get_buffer_pointer(ptx, -1, readonly=False) with nogil: status = nvJitLinkGetLinkedPtx(handle, _ptx_) @@ -153,6 +233,16 @@ cpdef get_linked_ptx(intptr_t handle, ptx): cpdef size_t get_error_log_size(intptr_t handle) except? 0: + """nvJitLinkGetErrorLogSize gets the size of the error log. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the error log. + + .. seealso:: `nvJitLinkGetErrorLogSize` + """ cdef size_t size with nogil: status = nvJitLinkGetErrorLogSize(handle, &size) @@ -161,6 +251,14 @@ cpdef size_t get_error_log_size(intptr_t handle) except? 0: cpdef get_error_log(intptr_t handle, log): + """nvJitLinkGetErrorLog puts any error messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (bytes): The error log. + + .. seealso:: `nvJitLinkGetErrorLog` + """ cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) with nogil: status = nvJitLinkGetErrorLog(handle, _log_) @@ -168,6 +266,16 @@ cpdef get_error_log(intptr_t handle, log): cpdef size_t get_info_log_size(intptr_t handle) except? 0: + """nvJitLinkGetInfoLogSize gets the size of the info log. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the info log. + + .. seealso:: `nvJitLinkGetInfoLogSize` + """ cdef size_t size with nogil: status = nvJitLinkGetInfoLogSize(handle, &size) @@ -176,6 +284,14 @@ cpdef size_t get_info_log_size(intptr_t handle) except? 0: cpdef get_info_log(intptr_t handle, log): + """nvJitLinkGetInfoLog puts any info messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (bytes): The info log. + + .. seealso:: `nvJitLinkGetInfoLog` + """ cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) with nogil: status = nvJitLinkGetInfoLog(handle, _log_) @@ -183,6 +299,16 @@ cpdef get_info_log(intptr_t handle, log): cpdef tuple version(): + """nvJitLinkVersion returns the current version of nvJitLink. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvJitLinkVersion` + """ cdef unsigned int major cdef unsigned int minor with nogil: From 8ee6aa2c16561312de13dfe91e7b6cfd259a1f0e Mon Sep 17 00:00:00 2001 From: ksimpson Date: Tue, 22 Oct 2024 11:17:06 -0700 Subject: [PATCH 29/34] regenerate bindings with windows imports --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index e50de88af..586296ab4 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -8,6 +8,7 @@ from libc.stdint cimport intptr_t from .utils cimport get_nvjitlink_dso_version_suffix +from .utils import FunctionNotFoundError, NotSupportedError import os import site From f276cd67256f126ed239f5d64cc64ec549cafa47 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 23 Oct 2024 01:20:15 +0000 Subject: [PATCH 30/34] use tmp_path fixture --- cuda_bindings/tests/test_nvjitlink.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index 6524c4a88..a08377989 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -7,7 +7,7 @@ # is strictly prohibited. import pytest -import os + from cuda.bindings import nvjitlink @@ -76,16 +76,13 @@ def test_add_data(): nvjitlink.destroy(handle) -def test_add_file(): +def test_add_file(tmp_path): handle = nvjitlink.create(1, ["-arch=sm_90"]) - file_path = "test_file.cubin" - with open (file_path, "wb") as f: - f.write(ptx_kernel_bytes) - + file_path = tmp_path / "test_file.cubin" + file_path.write_bytes(ptx_kernel_bytes) nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) nvjitlink.complete(handle) nvjitlink.destroy(handle) - os.remove(file_path) def test_get_error_log(): From fcdae67288d82a25aa73e4e7046c1e75eafb16b2 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 22 Oct 2024 21:24:07 -0400 Subject: [PATCH 31/34] fix license header in the test file --- cuda_bindings/tests/test_nvjitlink.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py index a08377989..182d2bc40 100644 --- a/cuda_bindings/tests/test_nvjitlink.py +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -1,10 +1,6 @@ -# Copyright 2021-2024 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# Please refer to the NVIDIA end user license agreement (EULA) associated -# with this source code for terms and conditions that govern your use of -# this software. Any use, reproduction, disclosure, or distribution of -# this software and related documentation outside the terms of the EULA -# is strictly prohibited. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import pytest From f861f80c23245b997204476b7de8ac6201693e86 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 22 Oct 2024 18:54:42 -0700 Subject: [PATCH 32/34] fix nvjitlink dll name --- cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index 586296ab4..b8ab705d8 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -52,7 +52,7 @@ cdef load_library(const int driver_ver): for suffix in get_nvjitlink_dso_version_suffix(driver_ver): if len(suffix) == 0: continue - dll_name = f"nvJitLink64_{suffix}.dll" + dll_name = f"nvJitLink_{suffix}0_0.dll" # First check if the DLL has been loaded by 3rd parties try: From 375c33b30a4253bebe6cee2048f691029797cf55 Mon Sep 17 00:00:00 2001 From: ksimpson Date: Fri, 25 Oct 2024 09:12:06 -0700 Subject: [PATCH 33/34] fix typo in setup.py --- cuda_bindings/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 9d0f6fad3..316559859 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -272,7 +272,7 @@ def do_cythonize(extensions): # tests ["tests/*.pyx"], - # interal files used by cybind. We on + # interal files used by cybind generated bindings ['cuda/bindings/_internal/nvjitlink.pyx'], ['cuda/bindings/_internal/utils.pyx'], ] From f7704a93d7fe606fe6d9ff15a7ae9d8d0dcc4271 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 25 Oct 2024 12:19:15 -0400 Subject: [PATCH 34/34] Update cuda_bindings/setup.py --- cuda_bindings/setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 316559859..ca1f82648 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -271,8 +271,7 @@ def do_cythonize(extensions): ["cuda/*.pyx"], # tests ["tests/*.pyx"], - - # interal files used by cybind generated bindings + # interal files used by generated bindings ['cuda/bindings/_internal/nvjitlink.pyx'], ['cuda/bindings/_internal/utils.pyx'], ]