Install pre-built xformers-0.0.32.post2 built with pt-2.9.0 (vllm-project#27598)

huydhn · ywang96 · devpatelio · commit a4e93355c990 · 2025-11-28T16:30:24.000-08:00
Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;
Co-authored-by: Roger Wang &lt;hey@rogerw.io&gt;
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -361,13 +361,6 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
     && uv pip install --system dist/*.whl --verbose \
         --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
 
-# TODO (huydhn): Remove this once xformers is released for 2.9.0
-RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
-    . /etc/environment
-    export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
-    uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.32.post2"
-BASH
-
 # Install FlashInfer pre-compiled kernel cache and binaries
 # https://docs.flashinfer.ai/installation.html
 RUN --mount=type=cache,target=/root/.cache/uv \
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -10,7 +10,7 @@ torchaudio==2.9.0
 # These must be updated alongside torch
 torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
-# xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.8
+xformers==0.0.33+5d4b92a5.d20251026; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.9
 # FlashInfer should be updated together with the Dockerfile
 flashinfer-python==0.4.1
 # Triton Kernels are needed for mxfp4 fused moe. (Should be updated alongside torch)