Merge branch 'main' into rparolin/cuda.core.hashable

rparolin · web-flow · commit b8cc00d145df · 2025-10-29T16:32:22.000-07:00
diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py
@@ -13,7 +13,7 @@
 from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK
 from cuda.pathfinder._version import __version__ as __version__
 
-# Indirection to help Sphinx find the docstring.
+# Indirections to help Sphinx find the docstrings.
 #: Mapping from short CUDA Toolkit (CTK) library names to their canonical
 #: header basenames (used to validate a discovered include directory).
 #: Example: ``"cublas" → "cublas.h"``. The key set is platform-aware
diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py
@@ -82,8 +82,11 @@
 }
 DIRECT_DEPENDENCIES = DIRECT_DEPENDENCIES_CTK | {
     "mathdx": ("nvrtc",),
+    "cublasmp": ("cublas", "cublasLt", "nvshmem_host"),
     "cufftMp": ("nvshmem_host",),
     "cudss": ("cublas", "cublasLt"),
+    "cutensor": ("cublasLt",),
+    "cutensorMg": ("cutensor", "cublasLt"),
 }
 
 # Based on these released files:
@@ -238,9 +241,11 @@
 }
 SUPPORTED_LINUX_SONAMES_OTHER = {
     "cublasmp": ("libcublasmp.so.0",),
-    "cufftMp": ("libcufftMp.so.11",),
+    "cufftMp": ("libcufftMp.so.12", "libcufftMp.so.11"),
     "mathdx": ("libmathdx.so.0",),
     "cudss": ("libcudss.so.0",),
+    "cutensor": ("libcutensor.so.2",),
+    "cutensorMg": ("libcutensorMg.so.2",),
     "nccl": ("libnccl.so.2",),
     "nvpl_fftw": ("libnvpl_fftw.so.0",),
     "nvshmem_host": ("libnvshmem_host.so.3",),
@@ -402,6 +407,8 @@
 SUPPORTED_WINDOWS_DLLS_OTHER = {
     "mathdx": ("mathdx64_0.dll",),
     "cudss": ("cudss64_0.dll",),
+    "cutensor": ("cutensor.dll",),
+    "cutensorMg": ("cutensorMg.dll",),
 }
 SUPPORTED_WINDOWS_DLLS = SUPPORTED_WINDOWS_DLLS_CTK | SUPPORTED_WINDOWS_DLLS_OTHER
 
@@ -446,7 +453,9 @@
 SITE_PACKAGES_LIBDIRS_LINUX_OTHER = {
     "cublasmp": ("nvidia/cublasmp/cu13/lib", "nvidia/cublasmp/cu12/lib"),
     "cudss": ("nvidia/cu13/lib", "nvidia/cu12/lib"),
-    "cufftMp": ("nvidia/cufftmp/cu12/lib",),
+    "cufftMp": ("nvidia/cufftmp/cu13/lib", "nvidia/cufftmp/cu12/lib"),
+    "cutensor": ("cutensor/lib",),
+    "cutensorMg": ("cutensor/lib",),
     "mathdx": ("nvidia/cu13/lib", "nvidia/cu12/lib"),
     "nccl": ("nvidia/nccl/lib",),
     "nvpl_fftw": ("nvpl/lib",),
@@ -484,7 +493,10 @@
     "nvvm": ("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"),
 }
 SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER = {
+    "cudss": ("nvidia/cu13/bin", "nvidia/cu12/bin"),
     "mathdx": ("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"),
+    "cutensor": ("cutensor/bin",),
+    "cutensorMg": ("cutensor/bin",),
 }
 SITE_PACKAGES_LIBDIRS_WINDOWS = SITE_PACKAGES_LIBDIRS_WINDOWS_CTK | SITE_PACKAGES_LIBDIRS_WINDOWS_OTHER
 
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py
@@ -22,28 +22,12 @@ def _joined_isfile(dirpath: str, basename: str) -> bool:
     return os.path.isfile(os.path.join(dirpath, basename))
 
 
-def _find_nvshmem_header_directory() -> Optional[str]:
-    if IS_WINDOWS:
-        # nvshmem has no Windows support.
-        return None
-
+def _find_under_site_packages(sub_dir: str, h_basename: str) -> Optional[str]:
     # Installed from a wheel
-    nvidia_sub_dirs = ("nvidia", "nvshmem", "include")
     hdr_dir: str  # help mypy
-    for hdr_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs):
-        if _joined_isfile(hdr_dir, "nvshmem.h"):
-            return hdr_dir
-
-    conda_prefix = os.environ.get("CONDA_PREFIX")
-    if conda_prefix and os.path.isdir(conda_prefix):
-        hdr_dir = os.path.join(conda_prefix, "include")
-        if _joined_isfile(hdr_dir, "nvshmem.h"):
-            return hdr_dir
-
-    for hdr_dir in sorted(glob.glob("/usr/include/nvshmem_*"), reverse=True):
-        if _joined_isfile(hdr_dir, "nvshmem.h"):
+    for hdr_dir in find_sub_dirs_all_sitepackages(tuple(sub_dir.split("/"))):
+        if _joined_isfile(hdr_dir, h_basename):
             return hdr_dir
-
     return None
 
 
@@ -54,6 +38,13 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
     parts.append("include")
     idir = os.path.join(*parts)
     if libname == "cccl":
+        if IS_WINDOWS:
+            cdir_ctk12 = os.path.join(idir, "targets", "x64")  # conda has this anomaly
+            cdir_ctk13 = os.path.join(cdir_ctk12, "cccl")
+            if _joined_isfile(cdir_ctk13, h_basename):
+                return cdir_ctk13
+            if _joined_isfile(cdir_ctk12, h_basename):
+                return cdir_ctk12
         cdir = os.path.join(idir, "cccl")  # CTK 13
         if _joined_isfile(cdir, h_basename):
             return cdir
@@ -62,38 +53,40 @@ def _find_based_on_ctk_layout(libname: str, h_basename: str, anchor_point: str)
     return None
 
 
-def _find_based_on_conda_layout(libname: str, h_basename: str, conda_prefix: str) -> Optional[str]:
+def _find_based_on_conda_layout(libname: str, h_basename: str, ctk_layout: bool) -> Optional[str]:
+    conda_prefix = os.environ.get("CONDA_PREFIX")
+    if not conda_prefix:
+        return None
     if IS_WINDOWS:
         anchor_point = os.path.join(conda_prefix, "Library")
         if not os.path.isdir(anchor_point):
             return None
     else:
-        targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include"))
-        if not targets_include_path:
-            return None
-        if len(targets_include_path) != 1:
-            # Conda does not support multiple architectures.
-            # QUESTION(PR#956): Do we want to issue a warning?
-            return None
-        anchor_point = os.path.dirname(targets_include_path[0])
+        if ctk_layout:
+            targets_include_path = glob.glob(os.path.join(conda_prefix, "targets", "*", "include"))
+            if not targets_include_path:
+                return None
+            if len(targets_include_path) != 1:
+                # Conda does not support multiple architectures.
+                # QUESTION(PR#956): Do we want to issue a warning?
+                return None
+            include_path = targets_include_path[0]
+        else:
+            include_path = os.path.join(conda_prefix, "include")
+        anchor_point = os.path.dirname(include_path)
     return _find_based_on_ctk_layout(libname, h_basename, anchor_point)
 
 
 def _find_ctk_header_directory(libname: str) -> Optional[str]:
     h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_CTK[libname]
     candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_CTK[libname]
 
-    # Installed from a wheel
     for cdir in candidate_dirs:
-        hdr_dir: str  # help mypy
-        for hdr_dir in find_sub_dirs_all_sitepackages(tuple(cdir.split("/"))):
-            if _joined_isfile(hdr_dir, h_basename):
-                return hdr_dir
+        if hdr_dir := _find_under_site_packages(cdir, h_basename):
+            return hdr_dir
 
-    conda_prefix = os.environ.get("CONDA_PREFIX")
-    if conda_prefix:  # noqa: SIM102
-        if result := _find_based_on_conda_layout(libname, h_basename, conda_prefix):
-            return result
+    if hdr_dir := _find_based_on_conda_layout(libname, h_basename, True):
+        return hdr_dir
 
     cuda_home = get_cuda_home_or_path()
     if cuda_home:  # noqa: SIM102
@@ -132,19 +125,28 @@ def find_nvidia_header_directory(libname: str) -> Optional[str]:
         3. **CUDA Toolkit environment variables**
 
            - Use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
-
-    Notes:
-        - The ``SUPPORTED_HEADERS_CTK`` dictionary maps each supported CUDA Toolkit
-          (CTK) library to the name of its canonical header (e.g., ``"cublas" →
-          "cublas.h"``). This is used to verify that the located directory is valid.
-
-        - The only supported non-CTK library at present is ``nvshmem``.
     """
 
-    if libname == "nvshmem":
-        return _abs_norm(_find_nvshmem_header_directory())
-
     if libname in supported_nvidia_headers.SUPPORTED_HEADERS_CTK:
         return _abs_norm(_find_ctk_header_directory(libname))
 
-    raise RuntimeError(f"UNKNOWN {libname=}")
+    h_basename = supported_nvidia_headers.SUPPORTED_HEADERS_NON_CTK.get(libname)
+    if h_basename is None:
+        raise RuntimeError(f"UNKNOWN {libname=}")
+
+    candidate_dirs = supported_nvidia_headers.SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK.get(libname, [])
+    hdr_dir: Optional[str]  # help mypy
+    for cdir in candidate_dirs:
+        if hdr_dir := _find_under_site_packages(cdir, h_basename):
+            return _abs_norm(hdr_dir)
+
+    if hdr_dir := _find_based_on_conda_layout(libname, h_basename, False):
+        return _abs_norm(hdr_dir)
+
+    candidate_dirs = supported_nvidia_headers.SUPPORTED_INSTALL_DIRS_NON_CTK.get(libname, [])
+    for cdir in candidate_dirs:
+        for hdr_dir in sorted(glob.glob(cdir), reverse=True):
+            if _joined_isfile(hdr_dir, h_basename):
+                return _abs_norm(hdr_dir)
+
+    return None
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/supported_nvidia_headers.py
@@ -57,3 +57,28 @@
     "nvrtc": ("nvidia/cu13/include", "nvidia/cuda_nvrtc/include"),
     "nvvm": ("nvidia/cu13/include", "nvidia/cuda_nvcc/nvvm/include"),
 }
+
+SUPPORTED_HEADERS_NON_CTK_COMMON = {
+    "cutensor": "cutensor.h",
+}
+SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY = {
+    "nvshmem": "nvshmem.h",
+}
+SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY: Final[dict[str, str]] = {}
+SUPPORTED_HEADERS_NON_CTK_LINUX = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY
+SUPPORTED_HEADERS_NON_CTK_WINDOWS = SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY
+SUPPORTED_HEADERS_NON_CTK_ALL = (
+    SUPPORTED_HEADERS_NON_CTK_COMMON | SUPPORTED_HEADERS_NON_CTK_LINUX_ONLY | SUPPORTED_HEADERS_NON_CTK_WINDOWS_ONLY
+)
+SUPPORTED_HEADERS_NON_CTK: Final[dict[str, str]] = (
+    SUPPORTED_HEADERS_NON_CTK_WINDOWS if IS_WINDOWS else SUPPORTED_HEADERS_NON_CTK_LINUX
+)
+
+SUPPORTED_SITE_PACKAGE_HEADER_DIRS_NON_CTK = {
+    "cutensor": ("cutensor/include",),
+    "nvshmem": ("nvidia/nvshmem/include",),
+}
+
+SUPPORTED_INSTALL_DIRS_NON_CTK = {
+    "nvshmem": ("/usr/include/nvshmem_*",),
+}
diff --git a/cuda_pathfinder/cuda/pathfinder/_version.py b/cuda_pathfinder/cuda/pathfinder/_version.py
@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-__version__ = "1.3.1"
+__version__ = "1.3.2"
diff --git a/cuda_pathfinder/docs/nv-versions.json b/cuda_pathfinder/docs/nv-versions.json
@@ -3,6 +3,10 @@
         "version": "latest",
         "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/"
     },
+    {
+        "version": "1.3.2",
+        "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.2/"
+    },
     {
         "version": "1.3.1",
         "url": "https://nvidia.github.io/cuda-python/cuda-pathfinder/1.3.1/"
diff --git a/cuda_pathfinder/docs/source/api.rst b/cuda_pathfinder/docs/source/api.rst
@@ -18,4 +18,5 @@ and experimental APIs for locating NVIDIA C/C++ header directories.
    DynamicLibNotFoundError
 
    SUPPORTED_HEADERS_CTK
+   SUPPORTED_HEADERS_NON_CTK
    find_nvidia_header_directory
diff --git a/cuda_pathfinder/docs/source/release/1.3.2-notes.rst b/cuda_pathfinder/docs/source/release/1.3.2-notes.rst
@@ -0,0 +1,15 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+.. py:currentmodule:: cuda.pathfinder
+
+``cuda-pathfinder`` 1.3.2 Release notes
+=======================================
+
+Released on Oct 29, 2025
+
+Highlights
+----------
+
+* Add cuTENSOR support & bug fixes discovered while working on conda testing
+  (`PR #1194 <https://github.com/NVIDIA/cuda-python/pull/1194>`_)
diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml
@@ -18,6 +18,7 @@ test = [
 cu12 = [
     "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl]==12.*",
     "cuda-toolkit[cufile]==12.*; sys_platform != 'win32'",
+    "cutensor-cu12",
     "nvidia-cublasmp-cu12; sys_platform != 'win32'",
     "nvidia-cudss-cu12",
     "nvidia-cufftmp-cu12; sys_platform != 'win32'",
@@ -28,6 +29,7 @@ cu12 = [
 cu13 = [
     "cuda-toolkit[nvcc,cublas,nvrtc,cudart,cufft,curand,cusolver,cusparse,npp,nvfatbin,nvjitlink,nvjpeg,cccl,nvvm]==13.*",
     "cuda-toolkit[cufile]==13.*; sys_platform != 'win32'",
+    "cutensor-cu13",
     "nvidia-cublasmp-cu13; sys_platform != 'win32'",
     "nvidia-cudss-cu13",
     "nvidia-nccl-cu13; sys_platform != 'win32'",
diff --git a/cuda_pathfinder/tests/local_helpers.py b/cuda_pathfinder/tests/local_helpers.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import functools
+import importlib.metadata
+import re
+
+
+@functools.cache
+def have_distribution(name_pattern: str) -> bool:
+    re_name_pattern = re.compile(name_pattern)
+    return any(
+        re_name_pattern.match(dist.metadata["Name"])
+        for dist in importlib.metadata.distributions()
+        if "Name" in dist.metadata
+    )
diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py
diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py
diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1
diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh