@@ -525,6 +525,7 @@ async def benchmark(
525525 api_url : str ,
526526 base_url : str ,
527527 model_id : str ,
528+ model_name : str ,
528529 tokenizer : PreTrainedTokenizerBase ,
529530 input_requests : List [Tuple [str , int , int ]],
530531 logprobs : Optional [int ],
@@ -553,6 +554,7 @@ async def benchmark(
553554 "Multi-modal content is only supported on 'openai-chat' backend." )
554555 test_input = RequestFuncInput (
555556 model = model_id ,
557+ model_name = model_name ,
556558 prompt = test_prompt ,
557559 api_url = api_url ,
558560 prompt_len = test_prompt_len ,
@@ -573,6 +575,7 @@ async def benchmark(
573575 if profile :
574576 print ("Starting profiler..." )
575577 profile_input = RequestFuncInput (model = model_id ,
578+ model_name = model_name ,
576579 prompt = test_prompt ,
577580 api_url = base_url + "/start_profile" ,
578581 prompt_len = test_prompt_len ,
@@ -616,6 +619,7 @@ async def limited_request_func(request_func_input, pbar):
616619 async for request in get_request (input_requests , request_rate , burstiness ):
617620 prompt , prompt_len , output_len , mm_content = request
618621 request_func_input = RequestFuncInput (model = model_id ,
622+ model_name = model_name ,
619623 prompt = prompt ,
620624 api_url = api_url ,
621625 prompt_len = prompt_len ,
@@ -780,6 +784,7 @@ def main(args: argparse.Namespace):
780784
781785 backend = args .backend
782786 model_id = args .model
787+ model_name = args .served_model_name
783788 tokenizer_id = args .tokenizer if args .tokenizer is not None else args .model
784789 tokenizer_mode = args .tokenizer_mode
785790
@@ -877,6 +882,7 @@ def main(args: argparse.Namespace):
877882 api_url = api_url ,
878883 base_url = base_url ,
879884 model_id = model_id ,
885+ model_name = model_name ,
880886 tokenizer = tokenizer ,
881887 input_requests = input_requests ,
882888 logprobs = args .logprobs ,
@@ -1222,5 +1228,12 @@ def main(args: argparse.Namespace):
12221228 'always use the slow tokenizer. \n * '
12231229 '"mistral" will always use the `mistral_common` tokenizer.' )
12241230
1231+ parser .add_argument ("--served-model-name" ,
1232+ type = str ,
1233+ default = None ,
1234+ help = "The model name used in the API. "
1235+ "If not specified, the model name will be the "
1236+ "same as the ``--model`` argument. " )
1237+
12251238 args = parser .parse_args ()
12261239 main (args )
0 commit comments