Skip to content

Commit 4ef102d

Browse files
authored
Merge branch 'vllm-project:main' into ransmith_triton_fav2_vsl
2 parents ecb3320 + 4078052 commit 4ef102d

File tree

168 files changed

+12199
-2418
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+12199
-2418
lines changed

CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,9 +446,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
446446
endif()
447447

448448
message(STATUS "Enabling C extension.")
449-
if(VLLM_GPU_LANG STREQUAL "CUDA")
450-
list(APPEND VLLM_C_LIBS cuda)
451-
endif()
452449
define_gpu_extension_target(
453450
_C
454451
DESTINATION vllm
@@ -457,7 +454,6 @@ define_gpu_extension_target(
457454
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
458455
ARCHITECTURES ${VLLM_GPU_ARCHES}
459456
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
460-
LIBRARIES ${VLLM_C_LIBS}
461457
USE_SABI 3
462458
WITH_SOABI)
463459

Dockerfile.ppc64le

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ USER root
44

55
ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/"
66

7-
RUN apt-get update -y && apt-get install -y git wget curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1 libssl-dev
7+
RUN apt-get update -y && apt-get install -y git wget kmod curl vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential ffmpeg libsm6 libxext6 libgl1 libssl-dev
88

99
# Some packages in requirements-cpu are installed here
1010
# IBM provides optimized packages for ppc64le processors in the open-ce project for mamba
1111
# Currently these may not be available for venv or pip directly
12-
RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 torchvision-cpu=0.16.2 rust && micromamba clean --all --yes
12+
RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 rust && micromamba clean --all --yes
1313

1414
COPY ./ /workspace/vllm
1515

@@ -21,7 +21,6 @@ RUN --mount=type=bind,source=.git,target=.git \
2121
RUN --mount=type=cache,target=/root/.cache/pip \
2222
RUSTFLAGS='-L /opt/conda/lib' pip install -v --prefer-binary --extra-index-url https://repo.fury.io/mgiessing \
2323
'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
24-
torch==2.3.1 \
2524
-r requirements-cpu.txt \
2625
xformers uvloop==0.20.0
2726

benchmarks/kernels/benchmark_moe.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,8 @@ def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
450450
def main(args: argparse.Namespace):
451451
print(args)
452452

453-
config = AutoConfig.from_pretrained(args.model)
453+
config = AutoConfig.from_pretrained(
454+
args.model, trust_remote_code=args.trust_remote_code)
454455
if config.architectures[0] == "DbrxForCausalLM":
455456
E = config.ffn_config.moe_num_experts
456457
topk = config.ffn_config.moe_top_k
@@ -461,6 +462,11 @@ def main(args: argparse.Namespace):
461462
topk = config.num_experts_per_tok
462463
intermediate_size = config.intermediate_size
463464
shard_intermediate_size = 2 * intermediate_size // args.tp_size
465+
elif config.architectures[0] == "DeepseekV3ForCausalLM":
466+
E = config.n_routed_experts
467+
topk = config.num_experts_per_tok
468+
intermediate_size = config.moe_intermediate_size
469+
shard_intermediate_size = 2 * intermediate_size // args.tp_size
464470
else:
465471
# Default: Mixtral.
466472
E = config.num_local_experts
@@ -538,6 +544,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
538544
parser.add_argument("--seed", type=int, default=0)
539545
parser.add_argument("--batch-size", type=int, required=False)
540546
parser.add_argument("--tune", action="store_true")
547+
parser.add_argument("--trust-remote-code", action="store_true")
541548
args = parser.parse_args()
542549

543550
main(args)

docs/requirements-docs.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
sphinx==6.2.1
2+
sphinx-argparse==0.4.0
23
sphinx-book-theme==1.0.1
34
sphinx-copybutton==0.5.2
4-
myst-parser==3.0.1
5-
sphinx-argparse==0.4.0
65
sphinx-design==0.6.1
76
sphinx-togglebutton==0.3.2
7+
myst-parser==3.0.1
88
msgspec
99
cloudpickle
1010

docs/source/_static/custom.js

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Add RunLLM widget
12
document.addEventListener("DOMContentLoaded", function () {
23
var script = document.createElement("script");
34
script.type = "module";
@@ -15,4 +16,23 @@ document.addEventListener("DOMContentLoaded", function () {
1516

1617
script.async = true;
1718
document.head.appendChild(script);
18-
});
19+
});
20+
21+
// Update URL search params when tab is clicked
22+
document.addEventListener("DOMContentLoaded", function () {
23+
const tabs = document.querySelectorAll(".sd-tab-label");
24+
25+
function updateURL(tab) {
26+
const syncGroup = tab.getAttribute("data-sync-group");
27+
const syncId = tab.getAttribute("data-sync-id");
28+
if (syncGroup && syncId) {
29+
const url = new URL(window.location);
30+
url.searchParams.set(syncGroup, syncId);
31+
window.history.replaceState(null, "", url);
32+
}
33+
}
34+
35+
tabs.forEach(tab => {
36+
tab.addEventListener("click", () => updateURL(tab));
37+
});
38+
});

docs/source/api/engine/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
.. currentmodule:: vllm.engine
99
```
1010

11-
```{toctree}
11+
:::{toctree}
1212
:caption: Engines
1313
:maxdepth: 2
1414

1515
llm_engine
1616
async_llm_engine
17-
```
17+
:::

docs/source/api/model/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
## Submodules
44

5-
```{toctree}
5+
:::{toctree}
66
:maxdepth: 1
77

88
interfaces_base
99
interfaces
1010
adapters
11-
```
11+
:::

docs/source/api/multimodal/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ Looking to add your own multi-modal model? Please follow the instructions listed
1717

1818
## Submodules
1919

20-
```{toctree}
20+
:::{toctree}
2121
:maxdepth: 1
2222

2323
inputs
2424
parse
2525
processing
2626
profiling
2727
registry
28-
```
28+
:::
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Offline Inference
22

3-
```{toctree}
3+
:::{toctree}
44
:caption: Contents
55
:maxdepth: 1
66

77
llm
88
llm_inputs
9-
```
9+
:::

docs/source/contributing/dockerfile/dockerfile.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ The edges of the build graph represent:
1717

1818
- `RUN --mount=(.\*)from=...` dependencies (with a dotted line and an empty diamond arrow head)
1919

20-
> ```{figure} /assets/contributing/dockerfile-stages-dependency.png
20+
> :::{figure} /assets/contributing/dockerfile-stages-dependency.png
2121
> :align: center
2222
> :alt: query
2323
> :width: 100%
24-
> ```
24+
> :::
2525
>
2626
> Made using: <https://github.com/patrickhoefler/dockerfilegraph>
2727
>

0 commit comments

Comments
 (0)