diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index ed0b6d290..403cb423b 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -17,7 +17,7 @@ inputs: description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'" required: false type: string - default: "cuda_nvcc,cuda_cudart,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_sanitizer_api,libnvjitlink" + default: "cuda_nvcc,cuda_cudart,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink" runs: using: composite @@ -50,11 +50,15 @@ runs: if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: | - CUDA_PATH="./cuda_toolkit" - mkdir $CUDA_PATH + # Everything under this folder is packed and stored in the GitHub Cache space, + # and unpacked after retrieving from the cache. + CACHE_TMP_DIR="./cache_tmp_dir" + rm -rf $CACHE_TMP_DIR + mkdir $CACHE_TMP_DIR # The binary archives (redist) are guaranteed to be updated as part of the release posting. CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/" + CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json" if [[ "${{ inputs.host-platform }}" == linux* ]]; then if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then CTK_SUBDIR="linux-x86_64" @@ -62,26 +66,24 @@ runs: CTK_SUBDIR="linux-sbsa" fi function extract() { - tar -xvf $1 -C $CUDA_PATH --strip-components=1 + tar -xvf $1 -C $CACHE_TMP_DIR --strip-components=1 } elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then CTK_SUBDIR="windows-x86_64" function extract() { _TEMP_DIR_=$(mktemp -d) unzip $1 -d $_TEMP_DIR_ - cp -r $_TEMP_DIR_/*/* $CUDA_PATH + cp -r $_TEMP_DIR_/*/* $CACHE_TMP_DIR rm -rf $_TEMP_DIR_ } fi - function populate_cuda_path() { # take the component name as a argument function download() { curl -kLSs $1 -o $2 } - local CTK_COMPONENT=$1 - local CTK_VERSION=$2 - CTK_COMPONENT_REL_PATH="$(curl -s ${CTK_BASE_URL}/redistrib_${CTK_VERSION}.json | + CTK_COMPONENT=$1 + CTK_COMPONENT_REL_PATH="$(curl -s $CTK_JSON_URL | python -c "import sys, json; print(json.load(sys.stdin)['${CTK_COMPONENT}']['${CTK_SUBDIR}']['relative_path'])")" CTK_COMPONENT_URL="${CTK_BASE_URL}/${CTK_COMPONENT_REL_PATH}" CTK_COMPONENT_COMPONENT_FILENAME="$(basename $CTK_COMPONENT_REL_PATH)" @@ -98,23 +100,23 @@ runs: CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//,,/,}" # Get headers and shared libraries in place for item in $(echo $CTK_CACHE_COMPONENTS | tr ',' ' '); do - ctk_version="${{ inputs.cuda-version }}" - if [[ "$item" == "cuda_sanitizer_api" ]]; then - # Always use latest CTK for cuda_sanitizer_api - # FIXME: Automatically track latest CTK version - CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" - if [[ "$CUDA_MAJOR" == "12" ]]; then - # TODO: Automatically track latest CTK minor version - ctk_version="12.8.0" - fi - fi - populate_cuda_path "$item" "$ctk_version" + populate_cuda_path "$item" done - ls -l $CUDA_PATH + ls -l $CACHE_TMP_DIR # Prepare the cache # Note: try to escape | and > ... - tar -czvf ${CTK_CACHE_FILENAME} ${CUDA_PATH} + tar -czvf ${CTK_CACHE_FILENAME} ${CACHE_TMP_DIR} + + # "Move" files from temp dir to CUDA_PATH + CUDA_PATH="./cuda_toolkit" + mkdir -p $CUDA_PATH + # Unfortunately we cannot use "rsync -av $CACHE_TMP_DIR/ $CUDA_PATH" because + # not all runners have rsync pre-installed (or even installable, such as + # Git Bash). We do it in the dumb way. + cp -r $CACHE_TMP_DIR/* $CUDA_PATH + rm -rf $CACHE_TMP_DIR + ls -l $CUDA_PATH - name: Upload CTK cache if: ${{ always() && @@ -129,8 +131,13 @@ runs: shell: bash --noprofile --norc -xeuo pipefail {0} run: | ls -l + CACHE_TMP_DIR="./cache_tmp_dir" CUDA_PATH="./cuda_toolkit" + mkdir -p $CUDA_PATH tar -xzvf $CTK_CACHE_FILENAME + # Can't use rsync here, see above + cp -r $CACHE_TMP_DIR/* $CUDA_PATH + rm -rf $CACHE_TMP_DIR $CTK_CACHE_FILENAME ls -l $CUDA_PATH if [ ! -d "$CUDA_PATH/include" ]; then exit 1 diff --git a/.github/workflows/guess_latest.sh b/.github/workflows/guess_latest.sh new file mode 100644 index 000000000..dc4c4649a --- /dev/null +++ b/.github/workflows/guess_latest.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + +# URL to search +URL="https://developer.download.nvidia.com/compute/cuda/redist/" + +# Fetch the directory listing and extract the latest version number +get_latest_version() { + # Get the HTML content of the page + local html_content=$(wget -q -O - "$URL") + + # Extract links matching the pattern redistrib_?.?.?.json + local files=$(echo "$html_content" | grep -oP 'redistrib_[0-9]+\.[0-9]+\.[0-9]+\.json' | cut -d'"' -f2) + + # If files were found, extract the version numbers and find the latest + if [ -n "$files" ]; then + # Extract just the version numbers using regex + local versions=$(echo "$files" | grep -oP 'redistrib_\K[0-9]+\.[0-9]+\.[0-9]+(?=\.json)') + + # Sort the versions and get the latest + local latest_version=$(echo "$versions" | sort -V | tail -n 1) + echo "$latest_version" + else + echo "No files matching the pattern were found." + return 1 + fi +} + +# Call the function and store the result +latest_version=$(get_latest_version) +echo $latest_version diff --git a/.github/workflows/install_gpu_driver.ps1 b/.github/workflows/install_gpu_driver.ps1 index 980e64996..955a304db 100644 --- a/.github/workflows/install_gpu_driver.ps1 +++ b/.github/workflows/install_gpu_driver.ps1 @@ -1,4 +1,6 @@ -#Requires -RunAsAdministrator +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 # Install the driver function Install-Driver { @@ -23,7 +25,7 @@ function Install-Driver { $ProgressPreference = $ProgressPreference_tmp Write-Output 'Download complete!' - # Install the file with the specified path from earlier as well as the RunAs admin option + # Install the file with the specified path from earlier Write-Output 'Running the driver installer...' Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait Write-Output 'Done!' diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 83dad5cec..2761f8c40 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -52,6 +52,14 @@ jobs: with: fetch-depth: 0 + - name: Install dependencies + uses: ./.github/actions/install_unix_deps + continue-on-error: false + with: + # gcc for Cython tests, jq/wget for artifact fetching + dependencies: "build-essential jq wget" + dependent_exes: "gcc jq wget" + - name: Set environment variables run: | PYTHON_VERSION_FORMATTED=$(echo '${{ inputs.python-version }}' | tr -d '.') @@ -78,6 +86,17 @@ jobs: fi fi + # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort + # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix + # Only local ctk installs have compute-sanitizer; there is not wheel for it + if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-version }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then + SETUP_SANITIZER=1 + echo "LATEST_CUDA_VERSION=$(bash .github/workflows/guess_latest.sh)" >> $GITHUB_ENV + else + SETUP_SANITIZER=0 + fi + echo "SETUP_SANITIZER=${SETUP_SANITIZER}" >> $GITHUB_ENV + # make outputs from the previous job as env vars CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV @@ -91,14 +110,6 @@ jobs: echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV - - name: Install dependencies - uses: ./.github/actions/install_unix_deps - continue-on-error: false - with: - # gcc for Cython tests, jq/wget for artifact fetching - dependencies: "build-essential jq wget" - dependent_exes: "gcc jq wget" - - name: Download cuda-python build artifacts if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} uses: actions/download-artifact@v4 @@ -184,12 +195,18 @@ jobs: host-platform: ${{ inputs.host-platform }} cuda-version: ${{ inputs.cuda-version }} + - name: Set up latest cuda_sanitizer_api + if: ${{ env.SETUP_SANITIZER == '1' }} + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ env.LATEST_CUDA_VERSION }} + cuda-components: "cuda_sanitizer_api" + - name: Set up compute-sanitizer run: | - # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort - # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix - # Only local ctk installs have compute-sanitizer; there is not wheel for it - if [[ "${{ inputs.python-version }}" == "3.12" && "${{ inputs.cuda-version }}" != "11.8.0" && "${{ inputs.local-ctk }}" == 1 ]]; then + if [[ "${SETUP_SANITIZER}" == 1 ]]; then COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer" COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g') SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1"