vllm-project · DarkLight1337 · Mar 8, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
@@ -426,7 +426,7 @@ main() {
 
   pip install -U transformers
 
-  pip install -r requirements-dev.txt
+  pip install -r requirements/dev.txt
   which genai-perf
 
   # check storage

diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
@@ -35,7 +35,7 @@ function cpu_tests() {
   # Run basic model test
   docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "
     set -e
-    pip install -r vllm/requirements-test.txt
+    pip install -r vllm/requirements/test.txt
     pytest -v -s tests/models/decoder_only/language -m cpu_model
     pytest -v -s tests/models/embedding/language -m cpu_model
     pytest -v -s tests/models/encoder_decoder/language -m cpu_model

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -35,7 +35,7 @@ steps:
   fast_check: true
   no_gpu: True
   commands:
-  - pip install -r requirements-docs.txt
+  - pip install -r ../../requirements/docs.txt
   - SPHINXOPTS=\"-W\" make html
   # Check API reference (if it fails, you may have missing mock imports)
   - grep \"sig sig-object py\" build/html/api/inference_params.html

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -50,7 +50,7 @@ jobs:
   #     matrix:
   #         os: ['ubuntu-20.04']
   #         python-version: ['3.9', '3.10', '3.11', '3.12']
-  #         pytorch-version: ['2.4.0']  # Must be the most recent version that meets requirements-cuda.txt.
+  #         pytorch-version: ['2.4.0']  # Must be the most recent version that meets requirements/cuda.txt.
   #         cuda-version: ['11.8', '12.1']
 
   #   steps:

diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
@@ -9,7 +9,7 @@ PATH=${cuda_home}/bin:$PATH
 LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
 
 # Install requirements
-$python_executable -m pip install -r requirements-build.txt -r requirements-cuda.txt
+$python_executable -m pip install -r requirements/build.txt -r requirements/cuda.txt
 
 # Limit the number of parallel jobs to avoid OOM
 export MAX_JOBS=1

@@ -44,8 +44,8 @@ repos:
   rev: 0.6.2
   hooks:
     - id: pip-compile
-      args: [requirements-test.in, -o, requirements-test.txt]
-      files: ^requirements-test\.(in|txt)$
+      args: [requirements/test.in, -o, requirements/test.txt]
+      files: ^requirements/test\.(in|txt)$
 - repo: local
   hooks:
   - id: mypy-local

@@ -18,4 +18,4 @@ formats: []
 # Optionally declare the Python requirements required to build your docs
 python:
   install:
-    - requirements: docs/requirements-docs.txt
+    - requirements: requirements/docs.txt
diff --git a/Dockerfile b/Dockerfile
@@ -57,10 +57,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
         uv pip install --index-url https://download.pytorch.org/whl/nightly/cu126 "torch==2.7.0.dev20250121+cu126" "torchvision==0.22.0.dev20250121";  \
     fi
 
-COPY requirements-common.txt requirements-common.txt
-COPY requirements-cuda.txt requirements-cuda.txt
+COPY requirements/common.txt requirements/common.txt
+COPY requirements/cuda.txt requirements/cuda.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements-cuda.txt
+    uv pip install -r requirements/cuda.txt
 
 # cuda arch list used by torch
 # can be useful for both `dev` and `test`
@@ -78,14 +78,14 @@ FROM base AS build
 ARG TARGETPLATFORM
 
 # install build dependencies
-COPY requirements-build.txt requirements-build.txt
+COPY requirements/build.txt requirements/build.txt
 
 # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements-build.txt
+    uv pip install -r requirements/build.txt
 
 COPY . .
 ARG GIT_REPO_CHECK=0
@@ -153,11 +153,11 @@ FROM base as dev
 # Reference: https://github.com/astral-sh/uv/pull/1694
 ENV UV_HTTP_TIMEOUT=500
 
-COPY requirements-lint.txt requirements-lint.txt
-COPY requirements-test.txt requirements-test.txt
-COPY requirements-dev.txt requirements-dev.txt
+COPY requirements/lint.txt requirements/lint.txt
+COPY requirements/test.txt requirements/test.txt
+COPY requirements/dev.txt requirements/dev.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements-dev.txt
+    uv pip install -r requirements/dev.txt
 #################### DEV IMAGE ####################
 
 #################### vLLM installation IMAGE ####################
@@ -234,9 +234,9 @@ COPY examples examples
 # some issues w.r.t. JIT compilation. Therefore we need to
 # install build dependencies for JIT compilation.
 # TODO: Remove this once FlashInfer AOT wheel is fixed
-COPY requirements-build.txt requirements-build.txt
+COPY requirements/build.txt requirements/build.txt
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements-build.txt
+    uv pip install -r requirements/build.txt
 
 #################### vLLM installation IMAGE ####################
 
@@ -253,7 +253,7 @@ ENV UV_HTTP_TIMEOUT=500
 
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements-dev.txt
+    uv pip install -r requirements/dev.txt
 
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \

diff --git a/Dockerfile.arm b/Dockerfile.arm
@@ -26,18 +26,18 @@ WORKDIR /workspace
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
 ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \
+    --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
     pip install --upgrade pip && \
-    pip install -r requirements-build.txt
+    pip install -r requirements/build.txt
 
 FROM cpu-test-arm AS build
 
 WORKDIR /workspace/vllm
 
 RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \
-    --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \
-    pip install -v -r requirements-cpu.txt
+    --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
+    --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
+    pip install -v -r requirements/cpu.txt
 
 COPY . .
 ARG GIT_REPO_CHECK=0

diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -29,18 +29,18 @@ WORKDIR /workspace
 ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
 ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \
+    --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
     pip install --upgrade pip && \
-    pip install -r requirements-build.txt
+    pip install -r requirements/build.txt
 
 FROM cpu-test-1 AS build
 
 WORKDIR /workspace/vllm
 
 RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \
-    --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \
-    pip install -v -r requirements-cpu.txt
+    --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
+    --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
+    pip install -v -r requirements/cpu.txt
 
 COPY . .
 ARG GIT_REPO_CHECK=0

diff --git a/Dockerfile.hpu b/Dockerfile.hpu
@@ -4,7 +4,7 @@ COPY ./ /workspace/vllm
 
 WORKDIR /workspace/vllm
 
-RUN pip install -v -r requirements-hpu.txt
+RUN pip install -v -r requirements/hpu.txt
 
 ENV no_proxy=localhost,127.0.0.1
 ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true

diff --git a/Dockerfile.neuron b/Dockerfile.neuron
@@ -36,7 +36,7 @@ RUN --mount=type=bind,source=.git,target=.git \
 
 RUN python3 -m pip install -U \
         'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
-        -r requirements-neuron.txt
+        -r requirements/neuron.txt
 
 ENV VLLM_TARGET_DEVICE neuron
 RUN --mount=type=bind,source=.git,target=.git \

diff --git a/Dockerfile.openvino b/Dockerfile.openvino
@@ -16,7 +16,7 @@ RUN --mount=type=bind,source=.git,target=.git \
 
 RUN python3 -m pip install -U pip
 # install build requirements
-RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements-build.txt
+RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/requirements/build.txt
 # build vLLM with OpenVINO backend
 RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVICE="openvino" python3 -m pip install /workspace
 

diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le
@@ -6,7 +6,7 @@ ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/"
 
 RUN apt-get update -y && apt-get install -y git wget kmod curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1 libssl-dev 
 
-# Some packages in requirements-cpu are installed here
+# Some packages in requirements/cpu are installed here
 # IBM provides optimized packages for ppc64le processors in the open-ce project for mamba
 # Currently these may not be available for venv or pip directly
 RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 rust && micromamba clean --all --yes
@@ -21,7 +21,7 @@ RUN --mount=type=bind,source=.git,target=.git \
 RUN --mount=type=cache,target=/root/.cache/pip  \
     RUSTFLAGS='-L /opt/conda/lib' pip install -v --prefer-binary --extra-index-url https://repo.fury.io/mgiessing \
         'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
-        -r requirements-cpu.txt \
+        -r requirements/cpu.txt \
         xformers uvloop==0.20.0
 
 RUN --mount=type=bind,source=.git,target=.git \

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
@@ -38,7 +38,7 @@ FROM fetch_vllm AS build_vllm
 ARG USE_CYTHON
 # Build vLLM
 RUN cd vllm \
-    && python3 -m pip install -r requirements-rocm.txt \
+    && python3 -m pip install -r requirements/rocm.txt \
     && python3 setup.py clean --all  \
     && if [ ${USE_CYTHON} -eq "1" ]; then python3 setup_cython.py build_ext --inplace; fi \
     && python3 setup.py bdist_wheel --dist-dir=dist
@@ -60,7 +60,7 @@ RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
 # Install vLLM
 RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
     cd /install \
-    && pip install -U -r requirements-rocm.txt \
+    && pip install -U -r requirements/rocm.txt \
     && pip uninstall -y vllm \
     && pip install *.whl
 
@@ -99,7 +99,7 @@ RUN if [ ${BUILD_RPD} -eq "1" ]; then \
 # Install vLLM
 RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
     cd /install \
-    && pip install -U -r requirements-rocm.txt \
+    && pip install -U -r requirements/rocm.txt \
     && pip uninstall -y vllm \
     && pip install *.whl
 

diff --git a/Dockerfile.s390x b/Dockerfile.s390x
@@ -58,7 +58,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     cd ../../python && \
     export PYARROW_PARALLEL=4 && \
     export ARROW_BUILD_TYPE=release && \
-    uv pip install -r requirements-build.txt && \
+    uv pip install -r requirements/build.txt && \
     python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
 
 FROM python-install AS numa-build
@@ -120,16 +120,16 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
     --mount=type=bind,from=pyarrow,source=/tmp/arrow/python/dist,target=/tmp/arrow-wheels \
     --mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \
-     sed -i '/^torch/d' requirements-build.txt && \
+     sed -i '/^torch/d' requirements/build.txt && \
      ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl | head -n 1) && \
      VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl | head -n 1) && \
     uv pip install -v \    
         $ARROW_WHL_FILE  \
         $VISION_WHL_FILE \
         --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
         --index-strategy unsafe-best-match \
-        -r requirements-build.txt \
-        -r requirements-cpu.txt 
+        -r requirements/build.txt \
+        -r requirements/cpu.txt 
 
 # Build and install vllm
 RUN --mount=type=cache,target=/root/.cache/uv \

diff --git a/Dockerfile.tpu b/Dockerfile.tpu
@@ -19,7 +19,7 @@ ENV VLLM_TARGET_DEVICE="tpu"
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=.git,target=.git \
     python3 -m pip install \
-        -r requirements-tpu.txt
+        -r requirements/tpu.txt
 RUN python3 setup.py develop
 
 # install development dependencies (for testing)

diff --git a/Dockerfile.xpu b/Dockerfile.xpu
@@ -25,12 +25,12 @@ RUN apt-get update -y && \
     wget
 
 WORKDIR /workspace/vllm
-COPY requirements-xpu.txt /workspace/vllm/requirements-xpu.txt
-COPY requirements-common.txt /workspace/vllm/requirements-common.txt
+COPY requirements/xpu.txt /workspace/vllm/requirements/xpu.txt
+COPY requirements/common.txt /workspace/vllm/requirements/common.txt
 
 RUN --mount=type=cache,target=/root/.cache/pip \
     pip install --no-cache-dir \
-    -r requirements-xpu.txt
+    -r requirements/xpu.txt
 
 RUN git clone https://github.com/intel/pti-gpu && \
     cd pti-gpu/sdk && \

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,9 +1,9 @@
 include LICENSE
-include requirements-common.txt
-include requirements-cuda.txt
-include requirements-rocm.txt
-include requirements-neuron.txt
-include requirements-cpu.txt
+include requirements/common.txt
+include requirements/cuda.txt
+include requirements/rocm.txt
+include requirements/neuron.txt
+include requirements/cpu.txt
 include CMakeLists.txt
 
 recursive-include cmake *

diff --git a/docs/README.md b/docs/README.md
@@ -4,7 +4,7 @@
 
 ```bash
 # Install dependencies.
-pip install -r requirements-docs.txt
+pip install -r ../requirements/docs.txt
 
 # Build the docs.
 make clean

diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md
@@ -23,7 +23,7 @@ Check out the [building from source](#build-from-source) documentation for detai
 ## Testing
 
 ```bash
-pip install -r requirements-dev.txt
+pip install -r requirements/dev.txt
 
 # Linting, formatting and static type checking
 pre-commit install --hook-type pre-commit --hook-type commit-msg

diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
@@ -63,7 +63,7 @@ To build and install vLLM from source, run:
 ```console
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -r requirements-hpu.txt
+pip install -r requirements/hpu.txt
 python setup.py develop
 ```
 
@@ -73,7 +73,7 @@ Currently, the latest features and performance optimizations are developed in Ga
 git clone https://github.com/HabanaAI/vllm-fork.git
 cd vllm-fork
 git checkout habana_main
-pip install -r requirements-hpu.txt
+pip install -r requirements/hpu.txt
 python setup.py develop
 ```
 

diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
@@ -116,7 +116,7 @@ Once neuronx-cc and transformers-neuronx packages are installed, we will be able
 ```console
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -U -r requirements-neuron.txt
+pip install -U -r requirements/neuron.txt
 VLLM_TARGET_DEVICE="neuron" pip install .
 ```
 

diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md
@@ -32,7 +32,7 @@ Second, clone vLLM and install prerequisites for the vLLM OpenVINO backend insta
 ```console
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -r requirements-build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
 ```
 
 Finally, install vLLM with OpenVINO backend:

diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
@@ -151,7 +151,7 @@ pip uninstall torch torch-xla -y
 Install build dependencies:
 
 ```bash
-pip install -r requirements-tpu.txt
+pip install -r requirements/tpu.txt
 sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev
 ```
 

diff --git a/docs/source/getting_started/installation/cpu/apple.inc.md b/docs/source/getting_started/installation/cpu/apple.inc.md
@@ -25,7 +25,7 @@ After installation of XCode and the Command Line Tools, which include Apple Clan
 ```console
 git clone https://github.com/vllm-project/vllm.git
 cd vllm
-pip install -r requirements-cpu.txt
+pip install -r requirements/cpu.txt
 pip install -e . 
 ```
 

diff --git a/docs/source/getting_started/installation/cpu/build.inc.md b/docs/source/getting_started/installation/cpu/build.inc.md
@@ -18,7 +18,7 @@ Third, install Python packages for vLLM CPU backend building:
 ```console
 pip install --upgrade pip
 pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
-pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
 ```
 
 Finally, build and install vLLM CPU backend: