diff --git a/.github/workflows/test_docker.yml b/.github/workflows/test_docker.yml index 4d33d96ad4..67fe620efc 100644 --- a/.github/workflows/test_docker.yml +++ b/.github/workflows/test_docker.yml @@ -56,6 +56,7 @@ jobs: docker images docker run --rm lmdeploy:latest lmdeploy check_env - name: Dive + if: ${{ matrix.cuda_version == 'cu12' }} uses: MaxymVlasov/dive-action@v1.5.0 with: image: lmdeploy:latest diff --git a/docker/install.sh b/docker/install.sh index 54d252e30d..43cae34b2f 100755 --- a/docker/install.sh +++ b/docker/install.sh @@ -25,11 +25,11 @@ popd >/dev/null if [[ "${CUDA_VERSION_SHORT}" = "cu118" ]]; then apt-get install -y --no-install-recommends cuda-minimal-build-11-8 elif [[ "${CUDA_VERSION_SHORT}" = "cu124" ]]; then - apt-get install -y --no-install-recommends cuda-minimal-build-12-4 dkms + apt-get install -y --no-install-recommends cuda-minimal-build-12-4 numactl dkms elif [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then - apt-get install -y --no-install-recommends cuda-minimal-build-12-8 dkms + apt-get install -y --no-install-recommends cuda-minimal-build-12-8 numactl dkms elif [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then - apt-get install -y --no-install-recommends cuda-minimal-build-13-0 dkms + apt-get install -y --no-install-recommends cuda-minimal-build-13-0 numactl dkms fi apt-get clean -y @@ -66,12 +66,20 @@ fi pip install torch${TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA_VERSION_SHORT} pip install /wheels/*.whl - if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]] && [[ "${PYTHON_VERSION}" != "3.9" ]]; then - pip install cuda-python dlblas==0.0.6 + pip install cuda-python dlblas==0.0.6 dlslime==0.0.1.post10 +fi + +# install pre-built flash attention 3 wheel +if [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then + FA3_WHEELS_URL="https://windreamer.github.io/flash-attention3-wheels/cu128_torch280" + pip install flash_attn_3 --find-links ${FA3_WHEELS_URL} --extra-index-url https://download.pytorch.org/whl/cu128 +elif [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then + FA3_WHEELS_URL="https://windreamer.github.io/flash-attention3-wheels/cu130_torch290" + pip install flash_attn_3 --find-links ${FA3_WHEELS_URL} --extra-index-url https://download.pytorch.org/whl/cu130 fi -# install pre-compiled flash attention wheel +# install pre-built flash attention wheel PLATFORM="linux_x86_64" PY_VERSION=$(python3 - <<'PY' import torch, sys diff --git a/docker/prepare_wheel.sh b/docker/prepare_wheel.sh index 9b7db69484..6a559e2f2b 100755 --- a/docker/prepare_wheel.sh +++ b/docker/prepare_wheel.sh @@ -23,7 +23,6 @@ if [[ ${PYTHON_VERSION} = "3.13" ]]; then fi if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then - GDRCOPY_VERSION=2.5.1 DEEP_EP_VERSION=9af0e0d # v1.2.1 DEEP_GEMM_VERSION=c9f8b34 # v2.1.1.post3