Skip to content

Commit 4d574bb

Browse files
hmellorIsotr0py
authored andcommitted
[Doc] Move examples into categories (vllm-project#11840)
Signed-off-by: Harry Mellor <[email protected]> Signed-off-by: Isotr0py <[email protected]>
1 parent a6f249c commit 4d574bb

File tree

116 files changed

+153
-124
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+153
-124
lines changed

.buildkite/run-cpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function cpu_tests() {
3030
# offline inference
3131
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" bash -c "
3232
set -e
33-
python3 examples/offline_inference.py"
33+
python3 examples/offline_inference/offline_inference.py"
3434

3535
# Run basic model test
3636
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "

.buildkite/run-gh200-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ remove_docker_container
2424

2525
# Run the image and test offline inference
2626
docker run --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
27-
python3 examples/offline_inference.py
27+
python3 examples/offline_inference/offline_inference.py
2828
'

.buildkite/run-hpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ trap remove_docker_container EXIT
1313
remove_docker_container
1414

1515
# Run the image and launch offline inference
16-
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference.py
16+
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/offline_inference.py

.buildkite/run-neuron-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,4 @@ docker run --rm -it --device=/dev/neuron0 --device=/dev/neuron1 --network host \
5151
-e "NEURON_COMPILE_CACHE_URL=${NEURON_COMPILE_CACHE_MOUNT}" \
5252
--name "${container_name}" \
5353
${image_name} \
54-
/bin/bash -c "python3 /workspace/vllm/examples/offline_inference_neuron.py"
54+
/bin/bash -c "python3 /workspace/vllm/examples/offline_inference/offline_inference_neuron.py"

.buildkite/run-openvino-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ trap remove_docker_container EXIT
1313
remove_docker_container
1414

1515
# Run the image and launch offline inference
16-
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py
16+
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference/offline_inference.py

.buildkite/run-tpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ remove_docker_container
1414
# For HF_TOKEN.
1515
source /etc/environment
1616
# Run a simple end-to-end example.
17-
docker run --privileged --net host --shm-size=16G -it -e "HF_TOKEN=$HF_TOKEN" --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
17+
docker run --privileged --net host --shm-size=16G -it -e "HF_TOKEN=$HF_TOKEN" --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference/offline_inference_tpu.py"

.buildkite/run-xpu-test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ remove_docker_container
1414

1515
# Run the image and test offline inference/tensor parallel
1616
docker run --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test sh -c '
17-
python3 examples/offline_inference.py
18-
python3 examples/offline_inference_cli.py -tp 2
17+
python3 examples/offline_inference/offline_inference.py
18+
python3 examples/offline_inference/offline_inference_cli.py -tp 2
1919
'

.buildkite/test-pipeline.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -187,19 +187,19 @@ steps:
187187
- examples/
188188
commands:
189189
- pip install tensorizer # for tensorizer test
190-
- python3 offline_inference.py
191-
- python3 cpu_offload.py
192-
- python3 offline_inference_chat.py
193-
- python3 offline_inference_with_prefix.py
194-
- python3 llm_engine_example.py
195-
- python3 offline_inference_vision_language.py
196-
- python3 offline_inference_vision_language_multi_image.py
197-
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
198-
- python3 offline_inference_encoder_decoder.py
199-
- python3 offline_inference_classification.py
200-
- python3 offline_inference_embedding.py
201-
- python3 offline_inference_scoring.py
202-
- python3 offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
190+
- python3 offline_inference/offline_inference.py
191+
- python3 offline_inference/cpu_offload.py
192+
- python3 offline_inference/offline_inference_chat.py
193+
- python3 offline_inference/offline_inference_with_prefix.py
194+
- python3 offline_inference/llm_engine_example.py
195+
- python3 offline_inference/offline_inference_vision_language.py
196+
- python3 offline_inference/offline_inference_vision_language_multi_image.py
197+
- python3 other/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
198+
- python3 offline_inference/offline_inference_encoder_decoder.py
199+
- python3 offline_inference/offline_inference_classification.py
200+
- python3 offline_inference/offline_inference_embedding.py
201+
- python3 offline_inference/offline_inference_scoring.py
202+
- python3 offline_inference/offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
203203

204204
- label: Prefix Caching Test # 9min
205205
mirror_hardwares: [amd]

.github/workflows/lint-and-deploy.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
version: v3.10.1
2828

2929
- name: Run chart-testing (lint)
30-
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm
30+
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/online_serving/chart-helm --charts examples/online_serving/chart-helm
3131

3232
- name: Setup minio
3333
run: |
@@ -64,7 +64,7 @@ jobs:
6464
run: |
6565
export AWS_ACCESS_KEY_ID=minioadmin
6666
export AWS_SECRET_ACCESS_KEY=minioadmin
67-
helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env"
67+
helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/online_serving/chart-helm -f examples/online_serving/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env"
6868
6969
- name: curl test
7070
run: |

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ ENV VLLM_USAGE_SOURCE production-docker-image
250250
# define sagemaker first, so it is not default from `docker build`
251251
FROM vllm-openai-base AS vllm-sagemaker
252252

253-
COPY examples/sagemaker-entrypoint.sh .
253+
COPY examples/online_serving/sagemaker-entrypoint.sh .
254254
RUN chmod +x sagemaker-entrypoint.sh
255255
ENTRYPOINT ["./sagemaker-entrypoint.sh"]
256256

0 commit comments

Comments
 (0)