Compute deprecated metrics using the newer version

ronensc · ronensc · commit f9bc64ea9bed · 2024-04-19T14:09:36.000+03:00
Deprecated metrics:
- num_prompt_tokens
- num_generation_tokens
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -574,8 +574,6 @@ def _get_stats(self,
         num_waiting = len(self.scheduler.waiting)
 
         # Iteration stats if we have scheduler output.
-        num_prompt_tokens = 0
-        num_generation_tokens = 0
         num_prompt_tokens_lst = []
         num_generation_tokens_lst = []
         request_n = []
@@ -594,13 +592,7 @@ def _get_stats(self,
                     for scheduled_seq_group in
                     scheduler_outputs.scheduled_seq_groups
                 ]
-                num_prompt_tokens = sum(num_prompt_tokens_lst)
-                num_generation_tokens = sum(
-                    scheduled_seq_group.seq_group.num_seqs()
-                    for scheduled_seq_group in
-                    scheduler_outputs.scheduled_seq_groups)
             else:
-                num_generation_tokens = scheduler_outputs.num_batched_tokens
                 num_generation_tokens_lst = [
                     seq.get_output_len() for scheduled_seq_group in
                     scheduler_outputs.scheduled_seq_groups for seq in
@@ -651,8 +643,6 @@ def _get_stats(self,
             gpu_cache_usage=gpu_cache_usage,
             cpu_cache_usage=cpu_cache_usage,
             finished_reason_lst=finished_reason_lst,
-            num_prompt_tokens=num_prompt_tokens,
-            num_generation_tokens=num_generation_tokens,
             num_prompt_tokens_lst=num_prompt_tokens_lst,
             num_generation_tokens_lst=num_generation_tokens_lst,
             request_n=request_n,
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
@@ -168,8 +168,6 @@ class Stats:
 
     # Raw stats from last model iteration.
     finished_reason_lst: List[str]
-    num_prompt_tokens: int
-    num_generation_tokens: int
     num_prompt_tokens_lst: List[int]
     num_generation_tokens_lst: List[int]
     request_n: List[int]
@@ -229,9 +227,9 @@ def _log_prometheus(self, stats: Stats) -> None:
 
         # Add to token counters.
         self.metrics.counter_prompt_tokens.labels(**self.labels).inc(
-            stats.num_prompt_tokens)
+            sum(stats.num_prompt_tokens_lst))
         self.metrics.counter_generation_tokens.labels(**self.labels).inc(
-            stats.num_generation_tokens)
+            sum(stats.num_generation_tokens_lst))
 
         # Add to request counters.
         finished_reason_counter = CollectionsCounter(stats.finished_reason_lst)
@@ -291,8 +289,8 @@ def log(self, stats: Stats) -> None:
         self._log_prometheus(stats)
 
         # Save tracked stats for token counters.
-        self.num_prompt_tokens.append(stats.num_prompt_tokens)
-        self.num_generation_tokens.append(stats.num_generation_tokens)
+        self.num_prompt_tokens.append(sum(stats.num_prompt_tokens_lst))
+        self.num_generation_tokens.append(sum(stats.num_generation_tokens_lst))
 
         # Log locally every local_interval seconds.
         if self._local_interval_elapsed(stats.now):