-
-
Notifications
You must be signed in to change notification settings - Fork 11.9k
[Misc] Clean up flags in vllm bench serve
#25138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
276deab
add
ywang96 fdd2247
modify
ywang96 8212013
Merge branch 'main' into cleanup-bench
ywang96 de602a9
update
ywang96 9717093
minor change
ywang96 4f71fcc
Merge branch 'main' into cleanup-bench
ywang96 30599a7
update doc
ywang96 3c3858f
Merge branch 'main' into cleanup-bench
ywang96 7b1ba35
Merge branch 'main' into cleanup-bench
ywang96 a70a5b7
update
ywang96 43f0f81
revert
ywang96 3d9856d
update
ywang96 31ee9f2
add
ywang96 2a00750
update test
ywang96 7ce0498
modify
ywang96 ad379a0
update
ywang96 b85f836
quotes
ywang96 81eaf36
Merge branch 'main' into cleanup-bench
ywang96 01ef01f
simplify
ywang96 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,8 +8,8 @@ | |
|
|
||
| On the client side, run: | ||
| vllm bench serve \ | ||
| --endpoint-type <endpoint_type. Default 'openai'> \ | ||
| --label <benchmark result label. Default using endpoint_type> \ | ||
| --backend <backend or endpoint type. Default 'openai'> \ | ||
| --label <benchmark result label. Default using backend> \ | ||
| --model <your_model> \ | ||
| --dataset-name <dataset_name. Default 'random'> \ | ||
| --request-rate <request_rate. Default inf> \ | ||
|
|
@@ -52,6 +52,21 @@ | |
| and (shutil.which("gnuplot") is not None)) | ||
|
|
||
|
|
||
| # TODO: Remove this in v0.11.0 | ||
| class DeprecatedEndpointTypeAction(argparse.Action): | ||
| """Argparse action for the deprecated --endpoint-type flag. | ||
| """ | ||
|
|
||
| def __call__(self, _, namespace, values, option_string=None): | ||
| warnings.warn( | ||
| "'--endpoint-type' is deprecated and will be removed in v0.11.0. " | ||
| "Please use '--backend' instead or remove this argument if you " | ||
| "have already set it.", | ||
| stacklevel=1, | ||
| ) | ||
| setattr(namespace, self.dest, values) | ||
|
|
||
|
|
||
| class TaskType(Enum): | ||
| GENERATION = "generation" | ||
| EMBEDDING = "embedding" | ||
|
|
@@ -470,7 +485,7 @@ async def benchmark( | |
| else: | ||
| request_func = ASYNC_REQUEST_FUNCS[endpoint_type] | ||
| else: | ||
| raise ValueError(f"Unknown endpoint_type: {endpoint_type}") | ||
| raise ValueError(f"Unknown backend: {endpoint_type}") | ||
|
|
||
| # Reuses connections across requests to reduce TLS handshake overhead. | ||
| connector = aiohttp.TCPConnector( | ||
|
|
@@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, | |
|
|
||
| def add_cli_args(parser: argparse.ArgumentParser): | ||
| add_dataset_parser(parser) | ||
| parser.add_argument( | ||
| "--endpoint-type", | ||
| type=str, | ||
| default="openai", | ||
| choices=list(ASYNC_REQUEST_FUNCS.keys()), | ||
| ) | ||
| parser.add_argument( | ||
| "--label", | ||
| type=str, | ||
| default=None, | ||
| help="The label (prefix) of the benchmark results. If not specified, " | ||
| "the endpoint type will be used as the label.", | ||
| "the value of '--backend' will be used as the label.", | ||
| ) | ||
| parser.add_argument( | ||
| "--backend", | ||
| type=str, | ||
| default="vllm", | ||
| default="openai", | ||
| choices=list(ASYNC_REQUEST_FUNCS.keys()), | ||
| help="The type of backend or endpoint to use for the benchmark." | ||
| ) | ||
| parser.add_argument( | ||
| "--endpoint-type", | ||
| type=str, | ||
| default=None, | ||
| choices=list(ASYNC_REQUEST_FUNCS.keys()), | ||
| action=DeprecatedEndpointTypeAction, | ||
| help="'--endpoint-type' is deprecated and will be removed in v0.11.0. " | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: you can throw a warning with customized action
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See if you're okay with the current version |
||
| "Please use '--backend' instead.", | ||
| ) | ||
| parser.add_argument( | ||
| "--base-url", | ||
|
|
@@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: | |
| raise ValueError( | ||
| "For exponential ramp-up, the start RPS cannot be 0.") | ||
|
|
||
| endpoint_type = args.endpoint_type | ||
| label = args.label | ||
| model_id = args.model | ||
| model_name = args.served_model_name | ||
|
|
@@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: | |
| gc.freeze() | ||
|
|
||
| benchmark_result = await benchmark( | ||
| endpoint_type=args.endpoint_type, | ||
| endpoint_type=args.backend, | ||
| api_url=api_url, | ||
| base_url=base_url, | ||
| model_id=model_id, | ||
|
|
@@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: | |
| # Setup | ||
| current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") | ||
| result_json["date"] = current_dt | ||
| result_json["endpoint_type"] = args.endpoint_type | ||
| result_json["endpoint_type"] = args.backend # for backward compatibility | ||
| result_json["backend"] = args.backend | ||
| result_json["label"] = label | ||
| result_json["model_id"] = model_id | ||
| result_json["tokenizer_id"] = tokenizer_id | ||
|
|
@@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: | |
| base_model_id = model_id.split("/")[-1] | ||
| max_concurrency_str = (f"-concurrency{args.max_concurrency}" | ||
| if args.max_concurrency is not None else "") | ||
| label = label or endpoint_type | ||
| label = label or args.backend | ||
| if args.ramp_up_strategy is not None: | ||
| file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa | ||
| else: | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.