fix ci

Potabk · Potabk · commit 7ca9d2f10e19 · 2025-02-26T16:32:23.000+08:00
Signed-off-by: wangli &lt;wangli858794774@gmail.com&gt;
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
@@ -157,6 +157,7 @@ def get_model(pretrained_model_name_or_path: str) -> str:
         return model_path
     return pretrained_model_name_or_path
 
+
 def get_tokenizer(
     pretrained_model_name_or_path: str,
     tokenizer_mode: str = "auto",
@@ -188,6 +189,7 @@ def get_tokenizer(
             **kwargs,
         )
 
+
 ASYNC_REQUEST_FUNCS = {
     "vllm": async_request_openai_completions,
 }
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -284,10 +284,9 @@ def sample_hf_requests(
     random_seed: int,
     fixed_output_len: Optional[int] = None,
 ) -> List[Tuple[str, str, int, Optional[Dict[str, Collection[str]]]]]:
-
     # Special case for vision_arena dataset
     if dataset_path == 'lmarena-ai/vision-arena-bench-v0.1' \
-        and dataset_subset is None:
+            and dataset_subset is None:
         assert dataset_split == "train"
         dataset = load_dataset(dataset_path,
                                name=dataset_subset,
@@ -303,8 +302,8 @@ def sample_hf_requests(
                            streaming=True)
     assert "conversations" in dataset.features, (
         "HF Dataset must have 'conversations' column.")
-    filter_func = lambda x: len(x["conversations"]) >= 2
-    filtered_dataset = dataset.shuffle(seed=random_seed).filter(filter_func)
+    filtered_dataset = dataset.shuffle(seed=random_seed).filter(
+        lambda x: len(x["conversations"]) >= 2, )
     sampled_requests: List[Tuple[str, int, int, Dict[str,
                                                      Collection[str]]]] = []
     for data in filtered_dataset:
@@ -323,7 +322,7 @@ def sample_hf_requests(
             # Prune too short sequences.
             continue
         if fixed_output_len is None and \
-            (prompt_len > 1024 or prompt_len + output_len > 2048):
+                (prompt_len > 1024 or prompt_len + output_len > 2048):
             # Prune too long sequences.
             continue
 
@@ -342,7 +341,7 @@ def sample_hf_requests(
             }
         elif "image" in data and isinstance(data["image"], str):
             if (data["image"].startswith("http://") or \
-                data["image"].startswith("file://")):
+                    data["image"].startswith("file://")):
                 image_url = data["image"]
             else:
                 image_url = f"file://{data['image']}"
@@ -962,8 +961,8 @@ def main(args: argparse.Namespace):
                     )
 
         # Traffic
-        result_json["request_rate"] = (args.request_rate if args.request_rate
-                                       < float("inf") else "inf")
+        result_json["request_rate"] = (
+            args.request_rate if args.request_rate < float("inf") else "inf")
         result_json["burstiness"] = args.burstiness
         result_json["max_concurrency"] = args.max_concurrency
 
@@ -974,7 +973,7 @@ def main(args: argparse.Namespace):
         base_model_id = model_id.split("/")[-1]
         max_concurrency_str = (f"-concurrency{args.max_concurrency}"
                                if args.max_concurrency is not None else "")
-        file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json"  #noqa
+        file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json"  # noqa
         if args.result_filename:
             file_name = args.result_filename
         if args.result_dir:
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
@@ -8,12 +8,10 @@
 from functools import cache
 from typing import Dict, List, Optional, Tuple
 
-import torch
 import uvloop
 from PIL import Image
 from tqdm import tqdm
-from transformers import (AutoModelForCausalLM, AutoTokenizer,
-                          PreTrainedTokenizerBase)
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.entrypoints.openai.api_server import \
     build_async_engine_client_from_engine_args
@@ -89,7 +87,6 @@ def get_random_lora_request(
 
 def sample_requests(tokenizer: PreTrainedTokenizerBase,
                     args: argparse.Namespace) -> List[SampleRequest]:
-
     dataset_path: str = args.dataset_path
     num_requests: int = args.num_prompts
     fixed_output_len: Optional[int] = args.output_len
@@ -215,6 +212,7 @@ def run_vllm(
         end = time.perf_counter()
     return end - start
 
+
 async def run_vllm_async(
     requests: List[SampleRequest],
     n: int,

Original file line number	Diff line number	Diff line change
`@@ -157,6 +157,7 @@ def get_model(pretrained_model_name_or_path: str) -> str:`
`157`	`157`	`return model_path`
`158`	`158`	`return pretrained_model_name_or_path`
`159`	`159`
	`160`	`+`
`160`	`161`	`def get_tokenizer(`
`161`	`162`	`pretrained_model_name_or_path: str,`
`162`	`163`	`tokenizer_mode: str = "auto",`
`@@ -188,6 +189,7 @@ def get_tokenizer(`
`188`	`189`	`**kwargs,`
`189`	`190`	`)`
`190`	`191`
	`192`	`+`
`191`	`193`	`ASYNC_REQUEST_FUNCS = {`
`192`	`194`	`"vllm": async_request_openai_completions,`
`193`	`195`	`}`