|
8 | 8 |
|
9 | 9 | On the client side, run: |
10 | 10 | vllm bench serve \ |
11 | | - --endpoint-type <endpoint_type. Default 'openai'> \ |
12 | | - --label <benchmark result label. Default using endpoint_type> \ |
| 11 | + --backend <backend or endpoint type. Default 'openai'> \ |
| 12 | + --label <benchmark result label. Default using backend> \ |
13 | 13 | --model <your_model> \ |
14 | 14 | --dataset-name <dataset_name. Default 'random'> \ |
15 | 15 | --request-rate <request_rate. Default inf> \ |
|
52 | 52 | and (shutil.which("gnuplot") is not None)) |
53 | 53 |
|
54 | 54 |
|
| 55 | +# TODO: Remove this in v0.11.0 |
| 56 | +class DeprecatedEndpointTypeAction(argparse.Action): |
| 57 | + """Argparse action for the deprecated --endpoint-type flag. |
| 58 | + """ |
| 59 | + |
| 60 | + def __call__(self, _, namespace, values, option_string=None): |
| 61 | + warnings.warn( |
| 62 | + "'--endpoint-type' is deprecated and will be removed in v0.11.0. " |
| 63 | + "Please use '--backend' instead or remove this argument if you " |
| 64 | + "have already set it.", |
| 65 | + stacklevel=1, |
| 66 | + ) |
| 67 | + setattr(namespace, self.dest, values) |
| 68 | + |
| 69 | + |
55 | 70 | class TaskType(Enum): |
56 | 71 | GENERATION = "generation" |
57 | 72 | EMBEDDING = "embedding" |
@@ -470,7 +485,7 @@ async def benchmark( |
470 | 485 | else: |
471 | 486 | request_func = ASYNC_REQUEST_FUNCS[endpoint_type] |
472 | 487 | else: |
473 | | - raise ValueError(f"Unknown endpoint_type: {endpoint_type}") |
| 488 | + raise ValueError(f"Unknown backend: {endpoint_type}") |
474 | 489 |
|
475 | 490 | # Reuses connections across requests to reduce TLS handshake overhead. |
476 | 491 | connector = aiohttp.TCPConnector( |
@@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, |
850 | 865 |
|
851 | 866 | def add_cli_args(parser: argparse.ArgumentParser): |
852 | 867 | add_dataset_parser(parser) |
853 | | - parser.add_argument( |
854 | | - "--endpoint-type", |
855 | | - type=str, |
856 | | - default="openai", |
857 | | - choices=list(ASYNC_REQUEST_FUNCS.keys()), |
858 | | - ) |
859 | 868 | parser.add_argument( |
860 | 869 | "--label", |
861 | 870 | type=str, |
862 | 871 | default=None, |
863 | 872 | help="The label (prefix) of the benchmark results. If not specified, " |
864 | | - "the endpoint type will be used as the label.", |
| 873 | + "the value of '--backend' will be used as the label.", |
865 | 874 | ) |
866 | 875 | parser.add_argument( |
867 | 876 | "--backend", |
868 | 877 | type=str, |
869 | | - default="vllm", |
| 878 | + default="openai", |
| 879 | + choices=list(ASYNC_REQUEST_FUNCS.keys()), |
| 880 | + help="The type of backend or endpoint to use for the benchmark." |
| 881 | + ) |
| 882 | + parser.add_argument( |
| 883 | + "--endpoint-type", |
| 884 | + type=str, |
| 885 | + default=None, |
870 | 886 | choices=list(ASYNC_REQUEST_FUNCS.keys()), |
| 887 | + action=DeprecatedEndpointTypeAction, |
| 888 | + help="'--endpoint-type' is deprecated and will be removed in v0.11.0. " |
| 889 | + "Please use '--backend' instead.", |
871 | 890 | ) |
872 | 891 | parser.add_argument( |
873 | 892 | "--base-url", |
@@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: |
1165 | 1184 | raise ValueError( |
1166 | 1185 | "For exponential ramp-up, the start RPS cannot be 0.") |
1167 | 1186 |
|
1168 | | - endpoint_type = args.endpoint_type |
1169 | 1187 | label = args.label |
1170 | 1188 | model_id = args.model |
1171 | 1189 | model_name = args.served_model_name |
@@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: |
1228 | 1246 | gc.freeze() |
1229 | 1247 |
|
1230 | 1248 | benchmark_result = await benchmark( |
1231 | | - endpoint_type=args.endpoint_type, |
| 1249 | + endpoint_type=args.backend, |
1232 | 1250 | api_url=api_url, |
1233 | 1251 | base_url=base_url, |
1234 | 1252 | model_id=model_id, |
@@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: |
1262 | 1280 | # Setup |
1263 | 1281 | current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") |
1264 | 1282 | result_json["date"] = current_dt |
1265 | | - result_json["endpoint_type"] = args.endpoint_type |
| 1283 | + result_json["endpoint_type"] = args.backend # for backward compatibility |
| 1284 | + result_json["backend"] = args.backend |
1266 | 1285 | result_json["label"] = label |
1267 | 1286 | result_json["model_id"] = model_id |
1268 | 1287 | result_json["tokenizer_id"] = tokenizer_id |
@@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: |
1312 | 1331 | base_model_id = model_id.split("/")[-1] |
1313 | 1332 | max_concurrency_str = (f"-concurrency{args.max_concurrency}" |
1314 | 1333 | if args.max_concurrency is not None else "") |
1315 | | - label = label or endpoint_type |
| 1334 | + label = label or args.backend |
1316 | 1335 | if args.ramp_up_strategy is not None: |
1317 | 1336 | file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa |
1318 | 1337 | else: |
|
0 commit comments