Skip to content

Commit f9bc64e

Browse files
committed
Compute deprecated metrics using the newer version
Deprecated metrics: - num_prompt_tokens - num_generation_tokens
1 parent e147575 commit f9bc64e

File tree

2 files changed

+4
-16
lines changed

2 files changed

+4
-16
lines changed

vllm/engine/llm_engine.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -574,8 +574,6 @@ def _get_stats(self,
574574
num_waiting = len(self.scheduler.waiting)
575575

576576
# Iteration stats if we have scheduler output.
577-
num_prompt_tokens = 0
578-
num_generation_tokens = 0
579577
num_prompt_tokens_lst = []
580578
num_generation_tokens_lst = []
581579
request_n = []
@@ -594,13 +592,7 @@ def _get_stats(self,
594592
for scheduled_seq_group in
595593
scheduler_outputs.scheduled_seq_groups
596594
]
597-
num_prompt_tokens = sum(num_prompt_tokens_lst)
598-
num_generation_tokens = sum(
599-
scheduled_seq_group.seq_group.num_seqs()
600-
for scheduled_seq_group in
601-
scheduler_outputs.scheduled_seq_groups)
602595
else:
603-
num_generation_tokens = scheduler_outputs.num_batched_tokens
604596
num_generation_tokens_lst = [
605597
seq.get_output_len() for scheduled_seq_group in
606598
scheduler_outputs.scheduled_seq_groups for seq in
@@ -651,8 +643,6 @@ def _get_stats(self,
651643
gpu_cache_usage=gpu_cache_usage,
652644
cpu_cache_usage=cpu_cache_usage,
653645
finished_reason_lst=finished_reason_lst,
654-
num_prompt_tokens=num_prompt_tokens,
655-
num_generation_tokens=num_generation_tokens,
656646
num_prompt_tokens_lst=num_prompt_tokens_lst,
657647
num_generation_tokens_lst=num_generation_tokens_lst,
658648
request_n=request_n,

vllm/engine/metrics.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ class Stats:
168168

169169
# Raw stats from last model iteration.
170170
finished_reason_lst: List[str]
171-
num_prompt_tokens: int
172-
num_generation_tokens: int
173171
num_prompt_tokens_lst: List[int]
174172
num_generation_tokens_lst: List[int]
175173
request_n: List[int]
@@ -229,9 +227,9 @@ def _log_prometheus(self, stats: Stats) -> None:
229227

230228
# Add to token counters.
231229
self.metrics.counter_prompt_tokens.labels(**self.labels).inc(
232-
stats.num_prompt_tokens)
230+
sum(stats.num_prompt_tokens_lst))
233231
self.metrics.counter_generation_tokens.labels(**self.labels).inc(
234-
stats.num_generation_tokens)
232+
sum(stats.num_generation_tokens_lst))
235233

236234
# Add to request counters.
237235
finished_reason_counter = CollectionsCounter(stats.finished_reason_lst)
@@ -291,8 +289,8 @@ def log(self, stats: Stats) -> None:
291289
self._log_prometheus(stats)
292290

293291
# Save tracked stats for token counters.
294-
self.num_prompt_tokens.append(stats.num_prompt_tokens)
295-
self.num_generation_tokens.append(stats.num_generation_tokens)
292+
self.num_prompt_tokens.append(sum(stats.num_prompt_tokens_lst))
293+
self.num_generation_tokens.append(sum(stats.num_generation_tokens_lst))
296294

297295
# Log locally every local_interval seconds.
298296
if self._local_interval_elapsed(stats.now):

0 commit comments

Comments
 (0)