TGISStatLogger: fix stats usage (#25)

tjohnson31415 · dtrifiro · web-flow · commit 06d987647ea1 · 2024-05-08T16:18:09.000-07:00
Cherry-pick of fix commit 6100f4b from ODH: opendatahub-io/vllm#17 --------- Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> Co-authored-by: Daniele Trifirò <dtrifiro@redhat.com>
diff --git a/vllm/tgis_utils/metrics.py b/vllm/tgis_utils/metrics.py
@@ -116,21 +116,19 @@ def log(self, stats: Stats) -> None:
         self._vllm_stat_logger.log(stats)
 
         # Then log TGIS specific ones
-        self.tgi_queue_size.set(stats.num_waiting + stats.num_swapped)
-        self.tgi_batch_current_size.set(stats.num_running)
+        self.tgi_queue_size.set(stats.num_waiting_sys + stats.num_swapped_sys)
+        self.tgi_batch_current_size.set(stats.num_running_sys)
 
-        for ttft in stats.time_to_first_tokens:
+        for ttft in stats.time_to_first_tokens_iter:
             self.tgi_batch_inference_duration.labels({
                 "method": "prefill"
             }).observe(ttft)
-        for tpot in stats.time_per_output_tokens:
+        for tpot in stats.time_per_output_tokens_iter:
             self.tgi_batch_inference_duration.labels({
                 "method": "next_token"
             }).observe(tpot)
 
-        # These metrics depend on open PR: https://github.com/vllm-project/vllm/pull/2764
-        if hasattr(stats, "num_prompt_tokens_lst"):
-            for input_len in stats.num_prompt_tokens_lst:
-                self.tgi_request_input_length.observe(input_len)
-            for output_len in stats.num_generation_tokens_lst:
-                self.tgi_request_generated_tokens.observe(output_len)
+        for input_len in stats.num_prompt_tokens_requests:
+            self.tgi_request_input_length.observe(input_len)
+        for output_len in stats.num_generation_tokens_requests:
+            self.tgi_request_generated_tokens.observe(output_len)