Skip to content

Commit 7ca9d2f

Browse files
committed
fix ci
Signed-off-by: wangli <[email protected]>
1 parent fcbcf88 commit 7ca9d2f

File tree

3 files changed

+12
-13
lines changed

3 files changed

+12
-13
lines changed

benchmarks/backend_request_func.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def get_model(pretrained_model_name_or_path: str) -> str:
157157
return model_path
158158
return pretrained_model_name_or_path
159159

160+
160161
def get_tokenizer(
161162
pretrained_model_name_or_path: str,
162163
tokenizer_mode: str = "auto",
@@ -188,6 +189,7 @@ def get_tokenizer(
188189
**kwargs,
189190
)
190191

192+
191193
ASYNC_REQUEST_FUNCS = {
192194
"vllm": async_request_openai_completions,
193195
}

benchmarks/benchmark_serving.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,9 @@ def sample_hf_requests(
284284
random_seed: int,
285285
fixed_output_len: Optional[int] = None,
286286
) -> List[Tuple[str, str, int, Optional[Dict[str, Collection[str]]]]]:
287-
288287
# Special case for vision_arena dataset
289288
if dataset_path == 'lmarena-ai/vision-arena-bench-v0.1' \
290-
and dataset_subset is None:
289+
and dataset_subset is None:
291290
assert dataset_split == "train"
292291
dataset = load_dataset(dataset_path,
293292
name=dataset_subset,
@@ -303,8 +302,8 @@ def sample_hf_requests(
303302
streaming=True)
304303
assert "conversations" in dataset.features, (
305304
"HF Dataset must have 'conversations' column.")
306-
filter_func = lambda x: len(x["conversations"]) >= 2
307-
filtered_dataset = dataset.shuffle(seed=random_seed).filter(filter_func)
305+
filtered_dataset = dataset.shuffle(seed=random_seed).filter(
306+
lambda x: len(x["conversations"]) >= 2, )
308307
sampled_requests: List[Tuple[str, int, int, Dict[str,
309308
Collection[str]]]] = []
310309
for data in filtered_dataset:
@@ -323,7 +322,7 @@ def sample_hf_requests(
323322
# Prune too short sequences.
324323
continue
325324
if fixed_output_len is None and \
326-
(prompt_len > 1024 or prompt_len + output_len > 2048):
325+
(prompt_len > 1024 or prompt_len + output_len > 2048):
327326
# Prune too long sequences.
328327
continue
329328

@@ -342,7 +341,7 @@ def sample_hf_requests(
342341
}
343342
elif "image" in data and isinstance(data["image"], str):
344343
if (data["image"].startswith("http://") or \
345-
data["image"].startswith("file://")):
344+
data["image"].startswith("file://")):
346345
image_url = data["image"]
347346
else:
348347
image_url = f"file://{data['image']}"
@@ -962,8 +961,8 @@ def main(args: argparse.Namespace):
962961
)
963962

964963
# Traffic
965-
result_json["request_rate"] = (args.request_rate if args.request_rate
966-
< float("inf") else "inf")
964+
result_json["request_rate"] = (
965+
args.request_rate if args.request_rate < float("inf") else "inf")
967966
result_json["burstiness"] = args.burstiness
968967
result_json["max_concurrency"] = args.max_concurrency
969968

@@ -974,7 +973,7 @@ def main(args: argparse.Namespace):
974973
base_model_id = model_id.split("/")[-1]
975974
max_concurrency_str = (f"-concurrency{args.max_concurrency}"
976975
if args.max_concurrency is not None else "")
977-
file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" #noqa
976+
file_name = f"{backend}-{args.request_rate}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa
978977
if args.result_filename:
979978
file_name = args.result_filename
980979
if args.result_dir:

benchmarks/benchmark_throughput.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88
from functools import cache
99
from typing import Dict, List, Optional, Tuple
1010

11-
import torch
1211
import uvloop
1312
from PIL import Image
1413
from tqdm import tqdm
15-
from transformers import (AutoModelForCausalLM, AutoTokenizer,
16-
PreTrainedTokenizerBase)
14+
from transformers import AutoTokenizer, PreTrainedTokenizerBase
1715
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
1816
from vllm.entrypoints.openai.api_server import \
1917
build_async_engine_client_from_engine_args
@@ -89,7 +87,6 @@ def get_random_lora_request(
8987

9088
def sample_requests(tokenizer: PreTrainedTokenizerBase,
9189
args: argparse.Namespace) -> List[SampleRequest]:
92-
9390
dataset_path: str = args.dataset_path
9491
num_requests: int = args.num_prompts
9592
fixed_output_len: Optional[int] = args.output_len
@@ -215,6 +212,7 @@ def run_vllm(
215212
end = time.perf_counter()
216213
return end - start
217214

215+
218216
async def run_vllm_async(
219217
requests: List[SampleRequest],
220218
n: int,

0 commit comments

Comments
 (0)