Skip to content
Merged

Add FA3 #4166

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/test_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ jobs:
docker images
docker run --rm lmdeploy:latest lmdeploy check_env
- name: Dive
if: ${{ matrix.cuda_version == 'cu12' }}
uses: MaxymVlasov/[email protected]
with:
image: lmdeploy:latest
Expand Down
20 changes: 14 additions & 6 deletions docker/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ popd >/dev/null
if [[ "${CUDA_VERSION_SHORT}" = "cu118" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-11-8
elif [[ "${CUDA_VERSION_SHORT}" = "cu124" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-4 dkms
apt-get install -y --no-install-recommends cuda-minimal-build-12-4 numactl dkms
elif [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-12-8 dkms
apt-get install -y --no-install-recommends cuda-minimal-build-12-8 numactl dkms
elif [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
apt-get install -y --no-install-recommends cuda-minimal-build-13-0 dkms
apt-get install -y --no-install-recommends cuda-minimal-build-13-0 numactl dkms
fi

apt-get clean -y
Expand Down Expand Up @@ -66,12 +66,20 @@ fi
pip install torch${TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA_VERSION_SHORT}
pip install /wheels/*.whl


if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]] && [[ "${PYTHON_VERSION}" != "3.9" ]]; then
pip install cuda-python dlblas==0.0.6
pip install cuda-python dlblas==0.0.6 dlslime==0.0.1.post10
fi

# install pre-built flash attention 3 wheel
if [[ "${CUDA_VERSION_SHORT}" = "cu128" ]]; then
FA3_WHEELS_URL="https://windreamer.github.io/flash-attention3-wheels/cu128_torch280"
pip install flash_attn_3 --find-links ${FA3_WHEELS_URL} --extra-index-url https://download.pytorch.org/whl/cu128
elif [[ "${CUDA_VERSION_SHORT}" = "cu130" ]]; then
FA3_WHEELS_URL="https://windreamer.github.io/flash-attention3-wheels/cu130_torch290"
pip install flash_attn_3 --find-links ${FA3_WHEELS_URL} --extra-index-url https://download.pytorch.org/whl/cu130
fi

# install pre-compiled flash attention wheel
# install pre-built flash attention wheel
PLATFORM="linux_x86_64"
PY_VERSION=$(python3 - <<'PY'
import torch, sys
Expand Down
1 change: 0 additions & 1 deletion docker/prepare_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ if [[ ${PYTHON_VERSION} = "3.13" ]]; then
fi

if [[ "${CUDA_VERSION_SHORT}" != "cu118" ]]; then

GDRCOPY_VERSION=2.5.1
DEEP_EP_VERSION=9af0e0d # v1.2.1
DEEP_GEMM_VERSION=c9f8b34 # v2.1.1.post3
Expand Down
Loading