From 6d2b1b3504d8bb14927a4b6cf747481f72fa2aa8 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 13 Mar 2025 20:27:39 -0700 Subject: [PATCH 1/9] Capture PRs since 2025-03-06 --- cuda_core/docs/source/release/0.2.0-notes.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 02f586d58..36d7fdd3c 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -27,9 +27,20 @@ New features - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. - A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`). - For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information. -- Support CUDA event timing. +- Support CUDA event timing. (#481, #498, #508) - An :class:`~_event.Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method. +Minor fixes and enhancements +---------------------------- +- Fix a dangling pointer problem in _linker.py (#516) +- Add ``@functools.lru_cache`` decorator for ``get_binding_version()`` (#512) +- Change selected ``.decode()`` to ``.decode("utf-8", errors="backslashreplace")`` (#510) +- Add :class:`Event` to public API (#501) + +Test fixes +---------- +- Clean up device initialization in test (#507) + Limitations ----------- From 0c6a283b85e9dc3cdbbcf493b3507cb60d4792f8 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 12:31:47 -0700 Subject: [PATCH 2/9] Check for required headers early Also fix conditional EGL API inclusions. They should always be included because all of their types are already redefined and available. --- .../bindings/_lib/cyruntime/cyruntime.pxd.in | 6 +- .../bindings/_lib/cyruntime/cyruntime.pyx.in | 11 ++++ cuda_bindings/setup.py | 62 ++++++++++++------- 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in index 743dac01a..c760f0220 100644 --- a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in +++ b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in @@ -108,9 +108,9 @@ from libcpp cimport bool {{if 'cudaCreateSurfaceObject' in found_functions}}cdef cudaError_t _cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaGetTextureObjectResourceDesc' in found_functions}}cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaGraphicsEGLRegisterImage' in found_functions}}cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} -{{if 'cudaEGLStreamProducerPresentFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} -{{if 'cudaEGLStreamProducerReturnFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} -{{if 'cudaGraphicsResourceGetMappedEglFrame' in found_functions}}cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} +{{if True}}cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} +{{if True}}cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} +{{if True}}cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if True}}cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in index 3210f173c..b66f0c0c8 100644 --- a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in +++ b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in @@ -2206,6 +2206,7 @@ cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, c return err {{endif}} +{{if True}} cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil: cdef cudaError_t err = cudaSuccess @@ -2222,6 +2223,9 @@ cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* con _setLastError(err) return err +{{endif}} +{{if True}} + cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil: cdef cudaError_t err = cudaSuccess err = m_global.lazyInitContextState() @@ -2242,6 +2246,9 @@ cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn return err return err +{{endif}} +{{if True}} + cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil: cdef cudaError_t err = cudaSuccess err = m_global.lazyInitContextState() @@ -2259,9 +2266,13 @@ cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, return err return err +{{endif}} +{{if True}} + cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil: return cudaErrorNotSupported +{{endif}} {{if 'cudaArrayGetMemoryRequirements' in found_functions}} cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil: diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 887f30ac2..b280781af 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -48,8 +48,11 @@ # ---------------------------------------------------------------------- # Parse user-provided CUDA headers -header_dict = { - "driver": ["cuda.h", "cudaProfiler.h", "cudaEGL.h", "cudaGL.h", "cudaVDPAU.h"], +required_headers = { + "driver": [ + "cuda.h", + "cudaProfiler.h", + ], "runtime": [ "driver_types.h", "vector_types.h", @@ -61,13 +64,44 @@ "device_types.h", "driver_functions.h", "cuda_profiler_api.h", - "cuda_egl_interop.h", - "cuda_gl_interop.h", - "cuda_vdpau_interop.h", ], - "nvrtc": ["nvrtc.h"], + "nvrtc": [ + "nvrtc.h", + ], + # During compilation, Cython will reference C headers that are not + # explicitly parsed above. The following headers are known dependencies: + # + # - crt/host_defines.h + # - builtin_types.h + # - cuda_device_runtime_api.h + # + # These dependencies are specified through the headers above. } +# Assert that all headers exist +header_dict = {} +missing_headers = [] +include_path_list = [os.path.join(path, "include") for path in CUDA_HOME] + +for library, header_list in required_headers.items(): + header_paths = [] + for header in header_list: + path_candidate = [os.path.join(path, header) for path in include_path_list] + for path in path_candidate: + if os.path.exists(path): + header_paths += [path] + break + if not os.path.exists(path): + missing_headers += [header] + + # Update dictionary with validated paths to headers + header_dict[library] = header_paths + +if missing_headers: + error_message = "Couldn't find required headers: " + error_message += ", ".join([header for header in missing_headers]) + raise RuntimeError(f"{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME=\"{CUDA_HOME}\")") + replace = { " __device_builtin__ ": " ", "CUDARTAPI ": " ", @@ -117,19 +151,8 @@ def __repr__(self): return f"{self._name}: {self._member_names} with types {self._member_types}" -include_path_list = [os.path.join(path, "include") for path in CUDA_HOME] print(f'Parsing headers in "{include_path_list}" (Caching = {PARSER_CACHING})') -for library, header_list in header_dict.items(): - header_paths = [] - for header in header_list: - path_candidate = [os.path.join(path, header) for path in include_path_list] - for path in path_candidate: - if os.path.exists(path): - header_paths += [path] - break - if not os.path.exists(path): - print(f"Missing header {header}") - +for library, header_paths in header_dict.items(): print(f"Parsing {library} headers") parser = CParser( header_paths, cache="./cache_{}".format(library.split(".")[0]) if PARSER_CACHING else None, replace=replace @@ -161,9 +184,6 @@ def __repr__(self): if discovered: found_struct += discovered -if len(found_functions) == 0: - raise RuntimeError(f'Parser found no functions. Is CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') - # ---------------------------------------------------------------------- # Generate From fe6dcedf3989f3c72ca6939f8322216bcaac7944 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 12:39:34 -0700 Subject: [PATCH 3/9] Run pre-commit --- cuda_bindings/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index b280781af..e8db7899d 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -100,7 +100,7 @@ if missing_headers: error_message = "Couldn't find required headers: " error_message += ", ".join([header for header in missing_headers]) - raise RuntimeError(f"{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME=\"{CUDA_HOME}\")") + raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') replace = { " __device_builtin__ ": " ", From cd42fbbafcfda3a499798487f7c86b5e44d2b047 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 12:41:56 -0700 Subject: [PATCH 4/9] Wording --- cuda_bindings/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index e8db7899d..5945599a2 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -75,7 +75,7 @@ # - builtin_types.h # - cuda_device_runtime_api.h # - # These dependencies are specified through the headers above. + # These are the dependencies of the headers we parse. } # Assert that all headers exist From 525980f562b6719242aabf271bb29e2a65be5e29 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 12:43:34 -0700 Subject: [PATCH 5/9] Different wording --- cuda_bindings/setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index 5945599a2..a7ffafc5e 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -69,13 +69,11 @@ "nvrtc.h", ], # During compilation, Cython will reference C headers that are not - # explicitly parsed above. The following headers are known dependencies: + # explicitly parsed above. These are the known dependencies: # # - crt/host_defines.h # - builtin_types.h # - cuda_device_runtime_api.h - # - # These are the dependencies of the headers we parse. } # Assert that all headers exist From c7c7892a7af37ea63beb5e4329b14119cf294f21 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 15:25:26 -0700 Subject: [PATCH 6/9] Update release notes --- cuda_bindings/docs/source/release/12.X.Y-notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/docs/source/release/12.X.Y-notes.rst b/cuda_bindings/docs/source/release/12.X.Y-notes.rst index 708bb77e3..6536d3aea 100644 --- a/cuda_bindings/docs/source/release/12.X.Y-notes.rst +++ b/cuda_bindings/docs/source/release/12.X.Y-notes.rst @@ -9,3 +9,4 @@ Highlights * The ``cuda.bindings.nvvm`` Python module was added, wrapping the `libNVVM C API `_. +* Source build error checking added for missing required headers From 0a99e5a115bcc2075a2d555a9a8a2c8d72bf8804 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Thu, 13 Mar 2025 16:12:27 -0700 Subject: [PATCH 7/9] Apply review --- cuda_bindings/setup.py | 153 ++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 72 deletions(-) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index a7ffafc5e..3409766bf 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -76,49 +76,30 @@ # - cuda_device_runtime_api.h } -# Assert that all headers exist -header_dict = {} -missing_headers = [] -include_path_list = [os.path.join(path, "include") for path in CUDA_HOME] -for library, header_list in required_headers.items(): - header_paths = [] - for header in header_list: - path_candidate = [os.path.join(path, header) for path in include_path_list] - for path in path_candidate: - if os.path.exists(path): - header_paths += [path] - break - if not os.path.exists(path): - missing_headers += [header] - - # Update dictionary with validated paths to headers - header_dict[library] = header_paths - -if missing_headers: - error_message = "Couldn't find required headers: " - error_message += ", ".join([header for header in missing_headers]) - raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') - -replace = { - " __device_builtin__ ": " ", - "CUDARTAPI ": " ", - "typedef __device_builtin__ enum cudaError cudaError_t;": "typedef cudaError cudaError_t;", - "typedef __device_builtin__ enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;", - "typedef enum cudaError cudaError_t;": "typedef cudaError cudaError_t;", - "typedef enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;", - "typedef enum cudaDataType_t cudaDataType_t;": "", - "typedef enum libraryPropertyType_t libraryPropertyType_t;": "", - " enum ": " ", - ", enum ": ", ", - "\\(enum ": "(", -} +def fetch_header_paths(required_headers, include_path_list): + header_dict = {} + missing_headers = [] + for library, header_list in required_headers.items(): + header_paths = [] + for header in header_list: + path_candidate = [os.path.join(path, header) for path in include_path_list] + for path in path_candidate: + if os.path.exists(path): + header_paths += [path] + break + else: + missing_headers += [header] + + # Update dictionary with validated paths to headers + header_dict[library] = header_paths -found_types = [] -found_functions = [] -found_values = [] -found_struct = [] -struct_list = {} + if missing_headers: + error_message = "Couldn't find required headers: " + error_message += ", ".join([header for header in missing_headers]) + raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")') + + return header_dict class Struct: @@ -149,38 +130,66 @@ def __repr__(self): return f"{self._name}: {self._member_names} with types {self._member_types}" -print(f'Parsing headers in "{include_path_list}" (Caching = {PARSER_CACHING})') -for library, header_paths in header_dict.items(): - print(f"Parsing {library} headers") - parser = CParser( - header_paths, cache="./cache_{}".format(library.split(".")[0]) if PARSER_CACHING else None, replace=replace - ) +def parse_headers(header_dict): + found_types = [] + found_functions = [] + found_values = [] + found_struct = [] + struct_list = {} + + replace = { + " __device_builtin__ ": " ", + "CUDARTAPI ": " ", + "typedef __device_builtin__ enum cudaError cudaError_t;": "typedef cudaError cudaError_t;", + "typedef __device_builtin__ enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;", + "typedef enum cudaError cudaError_t;": "typedef cudaError cudaError_t;", + "typedef enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;", + "typedef enum cudaDataType_t cudaDataType_t;": "", + "typedef enum libraryPropertyType_t libraryPropertyType_t;": "", + " enum ": " ", + ", enum ": ", ", + "\\(enum ": "(", + } + + print(f'Parsing headers in "{include_path_list}" (Caching = {PARSER_CACHING})') + for library, header_paths in header_dict.items(): + print(f"Parsing {library} headers") + parser = CParser( + header_paths, cache="./cache_{}".format(library.split(".")[0]) if PARSER_CACHING else None, replace=replace + ) + + if library == "driver": + CUDA_VERSION = parser.defs["macros"].get("CUDA_VERSION", "Unknown") + print(f"Found CUDA_VERSION: {CUDA_VERSION}") - if library == "driver": - CUDA_VERSION = parser.defs["macros"].get("CUDA_VERSION", "Unknown") - print(f"Found CUDA_VERSION: {CUDA_VERSION}") - - # Combine types with others since they sometimes get tangled - found_types += {key for key in parser.defs["types"]} - found_types += {key for key in parser.defs["structs"]} - found_types += {key for key in parser.defs["unions"]} - found_types += {key for key in parser.defs["enums"]} - found_functions += {key for key in parser.defs["functions"]} - found_values += {key for key in parser.defs["values"]} - - for key, value in parser.defs["structs"].items(): - struct_list[key] = Struct(key, value["members"]) - for key, value in parser.defs["unions"].items(): - struct_list[key] = Struct(key, value["members"]) - - for key, value in struct_list.items(): - if key.startswith("anon_union") or key.startswith("anon_struct"): - continue - - found_struct += [key] - discovered = value.discoverMembers(struct_list, key) - if discovered: - found_struct += discovered + # Combine types with others since they sometimes get tangled + found_types += {key for key in parser.defs["types"]} + found_types += {key for key in parser.defs["structs"]} + found_types += {key for key in parser.defs["unions"]} + found_types += {key for key in parser.defs["enums"]} + found_functions += {key for key in parser.defs["functions"]} + found_values += {key for key in parser.defs["values"]} + + for key, value in parser.defs["structs"].items(): + struct_list[key] = Struct(key, value["members"]) + for key, value in parser.defs["unions"].items(): + struct_list[key] = Struct(key, value["members"]) + + for key, value in struct_list.items(): + if key.startswith("anon_union") or key.startswith("anon_struct"): + continue + + found_struct += [key] + discovered = value.discoverMembers(struct_list, key) + if discovered: + found_struct += discovered + + return found_types, found_functions, found_values, found_struct, struct_list + + +include_path_list = [os.path.join(path, "include") for path in CUDA_HOME] +header_dict = fetch_header_paths(required_headers, include_path_list) +found_types, found_functions, found_values, found_struct, struct_list = parse_headers(header_dict) # ---------------------------------------------------------------------- # Generate From dc5a4228d7002f2947d379d79b38c0af18aaadff Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Mar 2025 16:03:16 -0700 Subject: [PATCH 8/9] Remove mention of :class:`Event` --- cuda_core/docs/source/release/0.2.0-notes.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 36d7fdd3c..35b1f1cc9 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -35,7 +35,6 @@ Minor fixes and enhancements - Fix a dangling pointer problem in _linker.py (#516) - Add ``@functools.lru_cache`` decorator for ``get_binding_version()`` (#512) - Change selected ``.decode()`` to ``.decode("utf-8", errors="backslashreplace")`` (#510) -- Add :class:`Event` to public API (#501) Test fixes ---------- From 5f31dd84a04f90ec437e525c341283594c90ea2b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Mar 2025 20:08:05 -0700 Subject: [PATCH 9/9] Add release notes for PRs identified by at-leofang: https://github.com/NVIDIA/cuda-python/pull/519#discussion_r1996271665 --- cuda_core/docs/source/release/0.2.0-notes.rst | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 35b1f1cc9..87353f476 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -27,18 +27,30 @@ New features - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. - A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`). - For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information. -- Support CUDA event timing. (#481, #498, #508) -- An :class:`~_event.Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method. +- Support CUDA :class:`Event` timing. (#481, #498, #508) +- An :class:`Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method. +- :class:`Program` now supports the additional ``PTX`` code type. (#317) +- :meth:`Linker.link` exceptions now include the original error log. (#423) +- In a systematic sweep through the cuda.core implementations, many exceptions messages were made more consistent and informative. (#458) + +New examples +------------ +- ``jit_lto_fractal.py`` — Demonstrates just-in-time link-time optimization for fractal generation. (:class:`Device`, :class:`LaunchConfig`, :class:`Linker`, :class:`LinkerOptions`, :class:`Program`, :class:`ProgramOptions`) (#475) +- ``simple_multi_gpu_example.py`` — Example of using multiple GPUs. (:class:`Device`, :class:`Program`, :class:`LaunchConfig`) (#304) +- ``show_device_properties.py`` — Displays detailed device properties. (:class:`Device`) (#474) Minor fixes and enhancements ---------------------------- -- Fix a dangling pointer problem in _linker.py (#516) -- Add ``@functools.lru_cache`` decorator for ``get_binding_version()`` (#512) -- Change selected ``.decode()`` to ``.decode("utf-8", errors="backslashreplace")`` (#510) +- A dangling pointer problem in ``_linker.py`` was fixed. (#516) +- Add ``@functools.lru_cache`` decorator for :func:`get_binding_version`. (#512) +- Selected ``.decode()`` were changed to ``.decode("utf-8", errors="backslashreplace")`` to ensure that decoding error messages does not abort the process. (#510) +- The performance of :meth:`Device.compute_capability` was improved. (#459) +- The :class:`Program` constructor now issues a warning when falling back to :func:`cuLink`. (#315) +- To avoid deprecation warnings, the cuda.bindings imports in the cuda.core implementations were cleaned up. (#404) Test fixes ---------- -- Clean up device initialization in test (#507) +- Clean up device initialization in some tests. (#507) Limitations -----------