Skip to content

Commit 21da733

Browse files
authored
[Misc] Clean up flags in vllm bench serve (#25138)
Signed-off-by: Roger Wang <[email protected]>
1 parent 66072b3 commit 21da733

File tree

4 files changed

+39
-23
lines changed

4 files changed

+39
-23
lines changed

docs/contributing/benchmarks.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct
156156
```bash
157157
vllm bench serve \
158158
--backend openai-chat \
159-
--endpoint-type openai-chat \
160159
--model Qwen/Qwen2-VL-7B-Instruct \
161160
--endpoint /v1/chat/completions \
162161
--dataset-name hf \
@@ -230,7 +229,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct
230229
```bash
231230
vllm bench serve \
232231
--backend openai-chat \
233-
--endpoint-type openai-chat \
234232
--model Qwen/Qwen2-VL-7B-Instruct \
235233
--endpoint /v1/chat/completions \
236234
--dataset-name hf \
@@ -245,7 +243,6 @@ vllm bench serve \
245243
```bash
246244
vllm bench serve \
247245
--backend openai-chat \
248-
--endpoint-type openai-chat \
249246
--model Qwen/Qwen2-VL-7B-Instruct \
250247
--endpoint /v1/chat/completions \
251248
--dataset-name hf \

tests/benchmarks/test_serve_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def test_bench_serve_chat(server):
6868
"5",
6969
"--endpoint",
7070
"/v1/chat/completions",
71-
"--endpoint-type",
71+
"--backend",
7272
"openai-chat",
7373
]
7474
result = subprocess.run(command, capture_output=True, text=True)

vllm/benchmarks/datasets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,7 +1358,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
13581358
elif args.dataset_name == "sonnet":
13591359
dataset = SonnetDataset(dataset_path=args.dataset_path)
13601360
# For the "sonnet" dataset, formatting depends on the backend.
1361-
if args.endpoint_type == "openai-chat":
1361+
if args.backend == "openai-chat":
13621362
input_requests = dataset.sample(
13631363
num_requests=args.num_prompts,
13641364
input_len=args.sonnet_input_len,
@@ -1462,15 +1462,15 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
14621462
"Please consider contributing if you would "
14631463
"like to add support for additional dataset formats.")
14641464

1465-
if dataset_class.IS_MULTIMODAL and args.endpoint_type not in [
1465+
if dataset_class.IS_MULTIMODAL and args.backend not in [
14661466
"openai-chat",
14671467
"openai-audio",
14681468
]:
14691469
# multi-modal benchmark is only available on OpenAI Chat
14701470
# endpoint-type.
14711471
raise ValueError(
14721472
"Multi-modal content is only supported on 'openai-chat' and "
1473-
"'openai-audio' endpoint-type.")
1473+
"'openai-audio' backends.")
14741474
input_requests = dataset_class(
14751475
dataset_path=args.dataset_path,
14761476
dataset_subset=args.hf_subset,
@@ -1563,7 +1563,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
15631563

15641564
try:
15651565
# Enforce endpoint compatibility for multimodal datasets.
1566-
if args.dataset_name == "random-mm" and args.endpoint_type not in [
1566+
if args.dataset_name == "random-mm" and args.backend not in [
15671567
"openai-chat"]:
15681568
raise ValueError(
15691569
"Multi-modal content (images) is only supported on "

vllm/benchmarks/serve.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
99
On the client side, run:
1010
vllm bench serve \
11-
--endpoint-type <endpoint_type. Default 'openai'> \
12-
--label <benchmark result label. Default using endpoint_type> \
11+
--backend <backend or endpoint type. Default 'openai'> \
12+
--label <benchmark result label. Default using backend> \
1313
--model <your_model> \
1414
--dataset-name <dataset_name. Default 'random'> \
1515
--request-rate <request_rate. Default inf> \
@@ -52,6 +52,21 @@
5252
and (shutil.which("gnuplot") is not None))
5353

5454

55+
# TODO: Remove this in v0.11.0
56+
class DeprecatedEndpointTypeAction(argparse.Action):
57+
"""Argparse action for the deprecated --endpoint-type flag.
58+
"""
59+
60+
def __call__(self, _, namespace, values, option_string=None):
61+
warnings.warn(
62+
"'--endpoint-type' is deprecated and will be removed in v0.11.0. "
63+
"Please use '--backend' instead or remove this argument if you "
64+
"have already set it.",
65+
stacklevel=1,
66+
)
67+
setattr(namespace, self.dest, values)
68+
69+
5570
class TaskType(Enum):
5671
GENERATION = "generation"
5772
EMBEDDING = "embedding"
@@ -470,7 +485,7 @@ async def benchmark(
470485
else:
471486
request_func = ASYNC_REQUEST_FUNCS[endpoint_type]
472487
else:
473-
raise ValueError(f"Unknown endpoint_type: {endpoint_type}")
488+
raise ValueError(f"Unknown backend: {endpoint_type}")
474489

475490
# Reuses connections across requests to reduce TLS handshake overhead.
476491
connector = aiohttp.TCPConnector(
@@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
850865

851866
def add_cli_args(parser: argparse.ArgumentParser):
852867
add_dataset_parser(parser)
853-
parser.add_argument(
854-
"--endpoint-type",
855-
type=str,
856-
default="openai",
857-
choices=list(ASYNC_REQUEST_FUNCS.keys()),
858-
)
859868
parser.add_argument(
860869
"--label",
861870
type=str,
862871
default=None,
863872
help="The label (prefix) of the benchmark results. If not specified, "
864-
"the endpoint type will be used as the label.",
873+
"the value of '--backend' will be used as the label.",
865874
)
866875
parser.add_argument(
867876
"--backend",
868877
type=str,
869-
default="vllm",
878+
default="openai",
879+
choices=list(ASYNC_REQUEST_FUNCS.keys()),
880+
help="The type of backend or endpoint to use for the benchmark."
881+
)
882+
parser.add_argument(
883+
"--endpoint-type",
884+
type=str,
885+
default=None,
870886
choices=list(ASYNC_REQUEST_FUNCS.keys()),
887+
action=DeprecatedEndpointTypeAction,
888+
help="'--endpoint-type' is deprecated and will be removed in v0.11.0. "
889+
"Please use '--backend' instead.",
871890
)
872891
parser.add_argument(
873892
"--base-url",
@@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
11651184
raise ValueError(
11661185
"For exponential ramp-up, the start RPS cannot be 0.")
11671186

1168-
endpoint_type = args.endpoint_type
11691187
label = args.label
11701188
model_id = args.model
11711189
model_name = args.served_model_name
@@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
12281246
gc.freeze()
12291247

12301248
benchmark_result = await benchmark(
1231-
endpoint_type=args.endpoint_type,
1249+
endpoint_type=args.backend,
12321250
api_url=api_url,
12331251
base_url=base_url,
12341252
model_id=model_id,
@@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
12621280
# Setup
12631281
current_dt = datetime.now().strftime("%Y%m%d-%H%M%S")
12641282
result_json["date"] = current_dt
1265-
result_json["endpoint_type"] = args.endpoint_type
1283+
result_json["endpoint_type"] = args.backend # for backward compatibility
1284+
result_json["backend"] = args.backend
12661285
result_json["label"] = label
12671286
result_json["model_id"] = model_id
12681287
result_json["tokenizer_id"] = tokenizer_id
@@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
13121331
base_model_id = model_id.split("/")[-1]
13131332
max_concurrency_str = (f"-concurrency{args.max_concurrency}"
13141333
if args.max_concurrency is not None else "")
1315-
label = label or endpoint_type
1334+
label = label or args.backend
13161335
if args.ramp_up_strategy is not None:
13171336
file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa
13181337
else:

0 commit comments

Comments
 (0)