Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
e738cba
Update PyTorch to 2.9.0
huydhn Sep 16, 2025
497cffd
Add a comment
huydhn Sep 16, 2025
89ba43a
Not setting --extra-index-url in test.in
huydhn Sep 16, 2025
157aae3
Use https://download.pytorch.org/whl/test
huydhn Sep 17, 2025
ea1ef7a
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 17, 2025
0a81fb2
Put torchao back to the same state
huydhn Sep 17, 2025
0325966
Install the latest torchao nightly for quantization test
huydhn Sep 17, 2025
39b9cbf
Debug distributed failures
huydhn Sep 17, 2025
0272040
Wrong torchao package
huydhn Sep 18, 2025
c2e0eaf
Attempt the fix in https://github.com/NVIDIA/nccl/issues/1838
huydhn Sep 18, 2025
c16db74
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 23, 2025
d3436a8
Set inductor_graph_partition to True by default
huydhn Sep 23, 2025
0e581a3
Rerun with RC3
huydhn Sep 23, 2025
84c6cc3
Rerun with RC4
huydhn Sep 24, 2025
3637adb
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 30, 2025
23c6427
Build CPU docker image
huydhn Sep 30, 2025
ba8a85f
Leave CPU for later
huydhn Sep 30, 2025
ec7b5c4
CPU build should work now
huydhn Sep 30, 2025
869d13e
Rebuild flashinfer-python for 2.9.0
huydhn Oct 1, 2025
a670c2e
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 1, 2025
145e225
Fix precommit
huydhn Oct 1, 2025
9cd7683
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 2, 2025
47ae5d8
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 2, 2025
e7064b4
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 3, 2025
106bd40
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 5, 2025
76e438d
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 8, 2025
ebaa419
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 9, 2025
b4ed78c
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 11, 2025
5d50c59
Skip some test unless it's B200
huydhn Oct 11, 2025
210aa68
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 14, 2025
39cd1b2
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 15, 2025
22f1d3e
Fix ao issue and use official 2.9.0 wheels
huydhn Oct 16, 2025
d1eb57b
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 16, 2025
e4d4655
Fix some more places
huydhn Oct 16, 2025
c7820e6
Tweak some OOM failures
huydhn Oct 16, 2025
4b09de2
Use cu129
huydhn Oct 17, 2025
4edb3b5
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 17, 2025
f659247
Update the CI image to use 12.9.1
huydhn Oct 17, 2025
a9398a9
Fix precommit and clean up flashinfer section
huydhn Oct 17, 2025
67f9ab9
Merge remote-tracking branch 'upstream/main' into pytorch-2.9.0
ProExpertProg Oct 17, 2025
fc8a49a
Enable `use_inductor_graph_partition` by default in >=2.9
ProExpertProg Oct 2, 2025
f6b27e6
Turn standalone compile back on
ProExpertProg Oct 4, 2025
f2bde49
Skip E2E test for inductor-partition + fp4
ProExpertProg Oct 17, 2025
8552681
[cherry-pick] [compile] Enable sequence parallelism matching w/o cust…
angelayi Oct 15, 2025
4ac901e
refactor e2e test to use Matches object
ProExpertProg Oct 16, 2025
00d63e9
Add dynamic shape to seqpar tests
ProExpertProg Oct 16, 2025
101f75d
Add E2E test for SeqPar+asyncTP (TODO rms_norm still needed)
ProExpertProg Oct 16, 2025
9dadf9a
Skip E2E test for inductor-partition + fp4
ProExpertProg Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ steps:
- tests/v1/engine/test_engine_core_client.py
- tests/distributed/test_symm_mem_allreduce.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
# test with torchrun tp=2 and external_dp=2
- torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
# test with torchrun tp=2 and pp=2
Expand Down Expand Up @@ -349,7 +351,8 @@ steps:
- python3 offline_inference/basic/embed.py
- python3 offline_inference/basic/score.py
- python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
- python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
# https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
- python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536

- label: Platform Tests (CUDA) # 4min
timeout_in_minutes: 15
Expand Down Expand Up @@ -529,7 +532,7 @@ steps:
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
- uv pip install --system torchao==0.13.0
- uv pip install --system torchao==0.13.0 --index-url https://download.pytorch.org/whl/cu129
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py

- label: LM Eval Small Models # 53min
Expand Down Expand Up @@ -970,6 +973,8 @@ steps:
- tests/v1/shutdown
- tests/v1/worker/test_worker_memory_snapshot.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ repos:
rev: 0.9.1
hooks:
- id: pip-compile
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128, --python-platform, x86_64-manylinux_2_28]
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/cu129, --python-platform, x86_64-manylinux_2_28]
files: ^requirements/test\.(in|txt)$
- repo: local
hooks:
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.9.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.0")

#
# Try to find python package with an executable that exactly matches
Expand Down
15 changes: 12 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# docs/contributing/dockerfile/dockerfile.md and
# docs/assets/contributing/dockerfile-stages-dependency.png

ARG CUDA_VERSION=12.8.1
ARG CUDA_VERSION=12.9.1
ARG PYTHON_VERSION=3.12

# By parameterizing the base images, we allow third-party to use their own
Expand Down Expand Up @@ -273,7 +273,7 @@ WORKDIR /vllm-workspace
ENV DEBIAN_FRONTEND=noninteractive
ARG TARGETPLATFORM

ARG GDRCOPY_CUDA_VERSION=12.8
ARG GDRCOPY_CUDA_VERSION=12.9
# Keep in line with FINAL_BASE_IMAGE
ARG GDRCOPY_OS_VERSION=Ubuntu22_04

Expand Down Expand Up @@ -356,6 +356,13 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

# TODO (huydhn): Remove this once xformers is released for 2.9.0
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
. /etc/environment
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/[email protected]"
BASH

# Install FlashInfer pre-compiled kernel cache and binaries
# https://docs.flashinfer.ai/installation.html
RUN --mount=type=cache,target=/root/.cache/uv \
Expand Down Expand Up @@ -422,6 +429,7 @@ ARG PYTHON_VERSION

ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
Expand All @@ -434,7 +442,8 @@ ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
uv pip install --system -r requirements/dev.txt; \
uv pip install --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi

# install development dependencies (for testing)
Expand Down
4 changes: 4 additions & 0 deletions docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,13 @@ FROM base AS vllm-test-deps

WORKDIR /workspace/vllm

# TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version
RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
cp requirements/test.in requirements/cpu-test.in && \
sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \
sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
Expand Down
Binary file modified docs/assets/contributing/dockerfile-stages-dependency.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 4 additions & 22 deletions docs/contributing/ci/update_pytorch_version.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ is ineffective.
While ongoing efforts like <https://github.com/vllm-project/vllm/issues/17419>
address the long build time at its source, the current workaround is to set `VLLM_CI_BRANCH`
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/use_postmerge_q`)
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/long_build`)
when manually triggering a build on Buildkite. This branch accomplishes two things:
1. Increase the timeout limit to 10 hours so that the build doesn't time out.
Expand All @@ -100,35 +100,17 @@ to warm it up so that future builds are faster.

## Update dependencies

Several vLLM dependencies, such as FlashInfer, also depend on PyTorch and need
Several vLLM dependencies like xFormers depend on PyTorch and need
to be updated accordingly. Rather than waiting for all of them to publish new
releases (which would take too much time), they can be built from
source to unblock the update process.

### FlashInfer

Here is how to build and install it from source with `torch2.7.0+cu128` in vLLM [Dockerfile](https://github.com/vllm-project/vllm/blob/27bebcd89792d5c4b08af7a65095759526f2f9e1/docker/Dockerfile#L259-L271):

```bash
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX'
export FLASHINFER_ENABLE_SM90=1
uv pip install --system \
--no-build-isolation "git+https://github.com/flashinfer-ai/[email protected]"
```

One caveat is that building FlashInfer from source adds approximately 30
minutes to the vLLM build time. Therefore, it's preferable to cache the wheel in a
public location for immediate installation, such as [this FlashInfer wheel link](https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl). For future releases, contact the PyTorch release
team if you want to get the package published there.
### xFormers

Similar to FlashInfer, here is how to build and install xFormers from source:
```bash
export TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
MAX_JOBS=16 uv pip install --system \
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].30"
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].32.post2"
```

## Update all the different vLLM platforms
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.8.0",
"torch == 2.9.0",
"wheel",
"jinja2",
]
Expand Down
2 changes: 1 addition & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
torch==2.8.0
torch==2.9.0
wheel
jinja2>=3.1.6
regex
Expand Down
10 changes: 5 additions & 5 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ numba == 0.61.2 # Required for N-gram speculative decoding

# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.8.0
torchaudio==2.8.0
torch==2.9.0
torchaudio==2.9.0
# These must be updated alongside torch
torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
# xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.4.1
flashinfer-python==0.4.1
10 changes: 5 additions & 5 deletions requirements/rocm-build.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Common dependencies
-r common.txt

--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.8.0
torchvision==0.23.0
torchaudio==2.8.0
--extra-index-url https://download.pytorch.org/whl/rocm6.4
torch==2.9.0
torchvision==0.24.0
torchaudio==2.9.0

triton==3.3.0
triton==3.5.0
cmake>=3.26.1,<4
packaging>=24.2
setuptools>=77.0.3,<80.0.0
Expand Down
8 changes: 4 additions & 4 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ soundfile # required for audio tests
jiwer # required for audio tests
tblib # for pickling test exceptions
timm >=1.0.17 # required for internvl and gemma3n-mm test
torch==2.8.0
torchaudio==2.8.0
torchvision==0.23.0
torch==2.9.0
torchaudio==2.9.0
torchvision==0.24.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
Expand Down Expand Up @@ -55,4 +55,4 @@ fastsafetensors>=0.1.10
pydantic>=2.12 # 2.11 leads to error on python 3.13
decord==0.6.0
terratorch @ git+https://github.com/IBM/[email protected] # required for PrithviMAE test
gpt-oss >= 0.0.7; python_version > '3.11'
gpt-oss >= 0.0.7; python_version > '3.11'
37 changes: 19 additions & 18 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128 --python-platform x86_64-manylinux_2_28
# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --python-platform x86_64-manylinux_2_28
absl-py==2.1.0
# via rouge-score
accelerate==1.0.1
Expand Down Expand Up @@ -573,42 +573,44 @@ numpy==1.26.4
# tritonclient
# vocos
# xarray
nvidia-cublas-cu12==12.8.4.1
nvidia-cublas-cu12==12.9.1.4
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-cupti-cu12==12.9.79
# via torch
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-nvrtc-cu12==12.9.86
# via torch
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cuda-runtime-cu12==12.9.79
# via torch
nvidia-cudnn-cu12==9.10.2.21
# via torch
nvidia-cufft-cu12==11.3.3.83
nvidia-cufft-cu12==11.4.1.4
# via torch
nvidia-cufile-cu12==1.13.1.3
nvidia-cufile-cu12==1.14.1.1
# via torch
nvidia-curand-cu12==10.3.9.90
nvidia-curand-cu12==10.3.10.19
# via torch
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusolver-cu12==11.7.5.82
# via torch
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparse-cu12==12.5.10.65
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.7.1
# via torch
nvidia-nccl-cu12==2.27.3
nvidia-nccl-cu12==2.27.5
# via torch
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvjitlink-cu12==12.9.86
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvtx-cu12==12.8.90
nvidia-nvshmem-cu12==3.3.20
# via torch
nvidia-nvtx-cu12==12.9.79
# via torch
omegaconf==2.3.0
# via
Expand Down Expand Up @@ -1017,7 +1019,6 @@ setuptools==77.0.3
# lightning-utilities
# pytablewriter
# torch
# triton
shapely==2.1.1
# via
# geopandas
Expand Down Expand Up @@ -1122,7 +1123,7 @@ tomli==2.2.1
# via schemathesis
tomli-w==1.2.0
# via schemathesis
torch==2.8.0+cu128
torch==2.9.0+cu129
# via
# -r requirements/test.in
# accelerate
Expand Down Expand Up @@ -1151,7 +1152,7 @@ torch==2.8.0+cu128
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.8.0+cu128
torchaudio==2.9.0
# via
# -r requirements/test.in
# encodec
Expand All @@ -1164,7 +1165,7 @@ torchmetrics==1.7.4
# pytorch-lightning
# terratorch
# torchgeo
torchvision==0.23.0+cu128
torchvision==0.24.0
# via
# -r requirements/test.in
# lightly
Expand Down Expand Up @@ -1205,7 +1206,7 @@ transformers==4.56.2
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.4.0
triton==3.5.0
# via torch
tritonclient==2.51.0
# via
Expand Down
Loading