Skip to content

Commit 333306a

Browse files
add benchmark for fix length input and output (#5857)
Co-authored-by: Roger Wang <[email protected]>
1 parent 6206dcb commit 333306a

File tree

1 file changed

+60
-5
lines changed

1 file changed

+60
-5
lines changed

benchmarks/benchmark_serving.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
--dataset-path <path to dataset> \
1818
--request-rate <request_rate> \ # By default <request_rate> is inf
1919
--num-prompts <num_prompts> # By default <num_prompts> is 1000
20-
20+
2121
when using tgi backend, add
2222
--endpoint /generate_stream
2323
to the end of the command above.
@@ -77,7 +77,6 @@ def sample_sharegpt_requests(
7777
) -> List[Tuple[str, int, int]]:
7878
if fixed_output_len is not None and fixed_output_len < 4:
7979
raise ValueError("output_len too small")
80-
8180
# Load the dataset.
8281
with open(dataset_path) as f:
8382
dataset = json.load(f)
@@ -185,6 +184,31 @@ def sample_sonnet_requests(
185184
return sampled_requests
186185

187186

187+
def sample_random_requests(
188+
input_len: int, output_len: int, num_prompts: int, range_ratio: float,
189+
tokenizer: PreTrainedTokenizerBase) -> List[Tuple[str, int, int]]:
190+
191+
input_lens = np.random.randint(
192+
int(input_len * range_ratio),
193+
input_len + 1,
194+
size=num_prompts,
195+
)
196+
output_lens = np.random.randint(
197+
int(output_len * range_ratio),
198+
output_len + 1,
199+
size=num_prompts,
200+
)
201+
offsets = np.random.randint(0, tokenizer.vocab_size, size=num_prompts)
202+
input_requests = []
203+
for i in range(args.num_prompts):
204+
prompt = tokenizer.decode([(offsets[i] + i + j) % tokenizer.vocab_size
205+
for j in range(input_lens[i])])
206+
input_requests.append(
207+
(prompt, int(input_lens[i]), int(output_lens[i])))
208+
209+
return input_requests
210+
211+
188212
async def get_request(
189213
input_requests: List[Tuple[str, int, int]],
190214
request_rate: float,
@@ -196,6 +220,7 @@ async def get_request(
196220
if request_rate == float("inf"):
197221
# If the request rate is infinity, then we don't need to wait.
198222
continue
223+
199224
# Sample the request interval from the exponential distribution.
200225
interval = np.random.exponential(1.0 / request_rate)
201226
# The next request will be sent after the interval.
@@ -219,7 +244,7 @@ def calculate_metrics(
219244
# We use the tokenizer to count the number of output tokens for all
220245
# serving backends instead of looking at len(outputs[i].itl) since
221246
# multiple output tokens may be bundled together
222-
# Note: this may inflate the output token count slightly
247+
# Note : this may inflate the output token count slightly
223248
output_len = len(
224249
tokenizer(outputs[i].generated_text,
225250
add_special_tokens=False).input_ids)
@@ -456,6 +481,15 @@ def main(args: argparse.Namespace):
456481
for prompt, prompt_formatted, prompt_len,
457482
output_len in input_requests]
458483

484+
elif args.dataset_name == "random":
485+
input_requests = sample_random_requests(
486+
input_len=args.input_len,
487+
output_len=args.output_len,
488+
num_prompts=args.num_prompts,
489+
range_ratio=args.range_ratio,
490+
tokenizer=tokenizer,
491+
)
492+
459493
else:
460494
raise ValueError(f"Unknown dataset: {args.dataset_name}")
461495

@@ -549,7 +583,7 @@ def main(args: argparse.Namespace):
549583
"--dataset-name",
550584
type=str,
551585
default="sharegpt",
552-
choices=["sharegpt", "sonnet"],
586+
choices=["sharegpt", "sonnet", "random"],
553587
help="Name of the dataset to benchmark on.",
554588
)
555589
parser.add_argument("--dataset-path",
@@ -566,7 +600,7 @@ def main(args: argparse.Namespace):
566600
"--tokenizer",
567601
type=str,
568602
help=
569-
"Name or path of the tokenizer, if not using the default tokenizer.",
603+
"Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
570604
)
571605
parser.add_argument(
572606
"--best-of",
@@ -609,6 +643,27 @@ def main(args: argparse.Namespace):
609643
help=
610644
"Number of prefix tokens per request, used only for sonnet dataset.",
611645
)
646+
parser.add_argument(
647+
"--random-input-len",
648+
type=int,
649+
default=1024,
650+
help=
651+
"Number of input tokens per request, used only for random sampling.",
652+
)
653+
parser.add_argument(
654+
"--random-output-len",
655+
type=int,
656+
default=128,
657+
help=
658+
"Number of output tokens per request, used only for random sampling.",
659+
)
660+
parser.add_argument(
661+
"--random-range-ratio",
662+
type=float,
663+
default=1.0,
664+
help="Range of sampled ratio of input/output length, "
665+
"used only for random sampling.",
666+
)
612667
parser.add_argument(
613668
"--request-rate",
614669
type=float,

0 commit comments

Comments
 (0)