1717 --dataset-path <path to dataset> \
1818 --request-rate <request_rate> \ # By default <request_rate> is inf
1919 --num-prompts <num_prompts> # By default <num_prompts> is 1000
20-
20+
2121 when using tgi backend, add
2222 --endpoint /generate_stream
2323 to the end of the command above.
@@ -77,7 +77,6 @@ def sample_sharegpt_requests(
7777) -> List [Tuple [str , int , int ]]:
7878 if fixed_output_len is not None and fixed_output_len < 4 :
7979 raise ValueError ("output_len too small" )
80-
8180 # Load the dataset.
8281 with open (dataset_path ) as f :
8382 dataset = json .load (f )
@@ -185,6 +184,31 @@ def sample_sonnet_requests(
185184 return sampled_requests
186185
187186
187+ def sample_random_requests (
188+ input_len : int , output_len : int , num_prompts : int , range_ratio : float ,
189+ tokenizer : PreTrainedTokenizerBase ) -> List [Tuple [str , int , int ]]:
190+
191+ input_lens = np .random .randint (
192+ int (input_len * range_ratio ),
193+ input_len + 1 ,
194+ size = num_prompts ,
195+ )
196+ output_lens = np .random .randint (
197+ int (output_len * range_ratio ),
198+ output_len + 1 ,
199+ size = num_prompts ,
200+ )
201+ offsets = np .random .randint (0 , tokenizer .vocab_size , size = num_prompts )
202+ input_requests = []
203+ for i in range (args .num_prompts ):
204+ prompt = tokenizer .decode ([(offsets [i ] + i + j ) % tokenizer .vocab_size
205+ for j in range (input_lens [i ])])
206+ input_requests .append (
207+ (prompt , int (input_lens [i ]), int (output_lens [i ])))
208+
209+ return input_requests
210+
211+
188212async def get_request (
189213 input_requests : List [Tuple [str , int , int ]],
190214 request_rate : float ,
@@ -196,6 +220,7 @@ async def get_request(
196220 if request_rate == float ("inf" ):
197221 # If the request rate is infinity, then we don't need to wait.
198222 continue
223+
199224 # Sample the request interval from the exponential distribution.
200225 interval = np .random .exponential (1.0 / request_rate )
201226 # The next request will be sent after the interval.
@@ -219,7 +244,7 @@ def calculate_metrics(
219244 # We use the tokenizer to count the number of output tokens for all
220245 # serving backends instead of looking at len(outputs[i].itl) since
221246 # multiple output tokens may be bundled together
222- # Note: this may inflate the output token count slightly
247+ # Note : this may inflate the output token count slightly
223248 output_len = len (
224249 tokenizer (outputs [i ].generated_text ,
225250 add_special_tokens = False ).input_ids )
@@ -456,6 +481,15 @@ def main(args: argparse.Namespace):
456481 for prompt , prompt_formatted , prompt_len ,
457482 output_len in input_requests ]
458483
484+ elif args .dataset_name == "random" :
485+ input_requests = sample_random_requests (
486+ input_len = args .input_len ,
487+ output_len = args .output_len ,
488+ num_prompts = args .num_prompts ,
489+ range_ratio = args .range_ratio ,
490+ tokenizer = tokenizer ,
491+ )
492+
459493 else :
460494 raise ValueError (f"Unknown dataset: { args .dataset_name } " )
461495
@@ -549,7 +583,7 @@ def main(args: argparse.Namespace):
549583 "--dataset-name" ,
550584 type = str ,
551585 default = "sharegpt" ,
552- choices = ["sharegpt" , "sonnet" ],
586+ choices = ["sharegpt" , "sonnet" , "random" ],
553587 help = "Name of the dataset to benchmark on." ,
554588 )
555589 parser .add_argument ("--dataset-path" ,
@@ -566,7 +600,7 @@ def main(args: argparse.Namespace):
566600 "--tokenizer" ,
567601 type = str ,
568602 help =
569- "Name or path of the tokenizer, if not using the default tokenizer." ,
603+ "Name or path of the tokenizer, if not using the default tokenizer." , # noqa: E501
570604 )
571605 parser .add_argument (
572606 "--best-of" ,
@@ -609,6 +643,27 @@ def main(args: argparse.Namespace):
609643 help =
610644 "Number of prefix tokens per request, used only for sonnet dataset." ,
611645 )
646+ parser .add_argument (
647+ "--random-input-len" ,
648+ type = int ,
649+ default = 1024 ,
650+ help =
651+ "Number of input tokens per request, used only for random sampling." ,
652+ )
653+ parser .add_argument (
654+ "--random-output-len" ,
655+ type = int ,
656+ default = 128 ,
657+ help =
658+ "Number of output tokens per request, used only for random sampling." ,
659+ )
660+ parser .add_argument (
661+ "--random-range-ratio" ,
662+ type = float ,
663+ default = 1.0 ,
664+ help = "Range of sampled ratio of input/output length, "
665+ "used only for random sampling." ,
666+ )
612667 parser .add_argument (
613668 "--request-rate" ,
614669 type = float ,
0 commit comments