vllm-project
diff --git a/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/pyproject.toml‎
Lines changed: 0 additions & 46 deletions b/‎.buildkite/pyproject.toml‎
Lines changed: 0 additions & 46 deletions
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 13 additions & 2 deletions b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 14 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 14 deletions
diff --git a/‎benchmarks/benchmark_block_pool.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/benchmark_block_pool.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/benchmark_ngram_proposer.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/benchmark_ngram_proposer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 3 deletions b/‎benchmarks/benchmark_serving_structured_output.py‎
Lines changed: 2 additions & 3 deletions
@@ -368,7 +368,7 @@ def parse_client_command(cmd: str) -> dict[str, Any]:
         # The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
         # we want to turn it into "8xGPUTYPE"
         df["GPU"] = df["GPU"].apply(
-            lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
+            lambda x: f"{len(x.splitlines())}x{x.splitlines()[0]}"
         )
 
     # get markdown tables
 
@@ -477,6 +477,7 @@ steps:
   source_file_dependencies:
   - csrc/mamba/
   - tests/kernels/mamba
+  - vllm/model_executor/layers/mamba/ops
   commands:
     - pytest -v -s kernels/mamba
 
@@ -834,11 +835,11 @@ steps:
     - pytest -v -s tests/kernels/moe/test_flashinfer.py
     - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
 
-- label: GPT-OSS Eval (Blackwell)
+- label: Blackwell GPT-OSS Eval
   timeout_in_minutes: 60
   working_dir: "/vllm-workspace/"
   gpu: b200
-  optional: true # disable while debugging
+  optional: true # run on nightlies
   source_file_dependencies:
   - tests/evals/gpt_oss
   - vllm/model_executor/models/gpt_oss.py
@@ -865,6 +866,16 @@ steps:
   commands:
     - pytest -s -v tests/quantization/test_blackwell_moe.py
 
+- label: Blackwell LM Eval Small Models
+  timeout_in_minutes: 75
+  gpu: b200
+  optional: true # run on nightlies
+  source_file_dependencies:
+  - csrc/
+  - vllm/model_executor/layers/quantization
+  commands:
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt --tp-size=1
+
 #####  1 GPU test  #####
 #####  multi gpus test  #####
 
 
@@ -23,6 +23,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 # Any change to the VllmConfig changes can have a large user-facing impact,
 # so spam a lot of people
 /vllm/config @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor @yewentao256 @ProExpertProg
+/vllm/config/cache.py @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor @yewentao256 @ProExpertProg @heheda12345
 
 # vLLM V1
 /vllm/v1 @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat
 
@@ -6,28 +6,16 @@ default_stages:
   - manual # Run in CI
 exclude: 'vllm/third_party/.*'
 repos:
-- repo: https://github.com/google/yapf
-  rev: v0.43.0
-  hooks:
-  - id: yapf
-    args: [--in-place, --verbose]
-    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
-    exclude: '(.buildkite|benchmarks|build|examples)/.*'
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.11.7
+  rev: v0.13.3
   hooks:
-  - id: ruff
+  - id: ruff-check
     args: [--output-format, github, --fix]
   - id: ruff-format
-    files: ^(.buildkite|benchmarks|examples)/.*
 - repo: https://github.com/crate-ci/typos
   rev: v1.35.5
   hooks:
   - id: typos
-- repo: https://github.com/PyCQA/isort
-  rev: 6.0.1
-  hooks:
-  - id: isort
 - repo: https://github.com/pre-commit/mirrors-clang-format
   rev: v20.1.3
   hooks:
 
@@ -2,9 +2,9 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import gc
 
+from benchmark_utils import TimeCollector
 from tabulate import tabulate
 
-from benchmark_utils import TimeCollector
 from vllm.utils import FlexibleArgumentParser
 from vllm.v1.core.block_pool import BlockPool
 
 
@@ -5,9 +5,9 @@
 from unittest import mock
 
 import numpy as np
+from benchmark_utils import TimeCollector
 from tabulate import tabulate
 
-from benchmark_utils import TimeCollector
 from vllm.config import (
     CacheConfig,
     DeviceConfig,
 
@@ -37,14 +37,13 @@
 import datasets
 import numpy as np
 import pandas as pd
-from tqdm.asyncio import tqdm
-from transformers import PreTrainedTokenizerBase
-
 from backend_request_func import (
     ASYNC_REQUEST_FUNCS,
     RequestFuncInput,
     RequestFuncOutput,
 )
+from tqdm.asyncio import tqdm
+from transformers import PreTrainedTokenizerBase
 
 try:
     from vllm.transformers_utils.tokenizer import get_tokenizer
Original file line number	Diff line number	Diff line change
`@@ -368,7 +368,7 @@ def parse_client_command(cmd: str) -> dict[str, Any]:`
`368`	`368`	`# The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",`
`369`	`369`	`# we want to turn it into "8xGPUTYPE"`
`370`	`370`	`df["GPU"] = df["GPU"].apply(`
`371`		`- lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"`
	`371`	`+ lambda x: f"{len(x.splitlines())}x{x.splitlines()[0]}"`
`372`	`372`	`)`
`373`	`373`
`374`	`374`	`# get markdown tables`